aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/TargetLowering.h10
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp19
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp17
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp111
-rw-r--r--test/CodeGen/X86/dagcombine-buildvector.ll16
5 files changed, 114 insertions, 59 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 163f4c5..ef166a2 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -825,11 +825,11 @@ public:
virtual bool
isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
- /// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
- /// loading 'Bytes' bytes from a location that is 'Dist' units away from the
- /// location that the 'Base' load is loading from.
- bool isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist,
- const MachineFrameInfo *MFI) const;
+ /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+ /// location that is 'Dist' units away from the location that the 'Base' load
+ /// is loading from.
+ bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes,
+ int Dist, const MachineFrameInfo *MFI) const;
/// PerformDAGCombine - This method will be invoked for all target nodes and
/// for any target-independent nodes that the target has registered with
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d1b2a3..609ec82 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3626,30 +3626,29 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
- SDNode *LD1 = getBuildPairElt(N, 0);
- if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
return SDValue();
MVT LD1VT = LD1->getValueType(0);
- SDNode *LD2 = getBuildPairElt(N, 1);
const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
if (ISD::isNON_EXTLoad(LD2) &&
LD2->hasOneUse() &&
// If both are volatile this would reduce the number of volatile loads.
// If one is volatile it might be ok, but play conservative and bail out.
- !cast<LoadSDNode>(LD1)->isVolatile() &&
- !cast<LoadSDNode>(LD2)->isVolatile() &&
+ !LD1->isVolatile() &&
+ !LD2->isVolatile() &&
TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
- LoadSDNode *LD = cast<LoadSDNode>(LD1);
- unsigned Align = LD->getAlignment();
+ unsigned Align = LD1->getAlignment();
unsigned NewAlign = TLI.getTargetData()->
getABITypeAlignment(VT.getTypeForMVT());
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- false, Align);
+ return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+ LD1->getBasePtr(), LD1->getSrcValue(),
+ LD1->getSrcValueOffset(), false, Align);
}
return SDValue();
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3334e53..ab4cd51 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2070,13 +2070,13 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
}
-/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is
-/// loading 'Bytes' bytes from a location that is 'Dist' units away from the
-/// location that the 'Base' load is loading from.
-bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
- unsigned Bytes, int Dist,
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+ unsigned Bytes, int Dist,
const MachineFrameInfo *MFI) const {
- if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode())
+ if (LD->getChain() != Base->getChain())
return false;
MVT VT = LD->getValueType(0);
if (VT.getSizeInBits() / 8 != Bytes)
@@ -2094,6 +2094,11 @@ bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,
if (FS != BFS || FS != (int)Bytes) return false;
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
+ if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+ if (V && (V->getSExtValue() == Dist*Bytes))
+ return true;
+ }
GlobalValue *GV1 = NULL;
GlobalValue *GV2 = NULL;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 924155c..77c9f3d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7675,8 +7675,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
if (Elt.getOpcode() == ISD::UNDEF)
continue;
- if (!TLI.isConsecutiveLoad(Elt.getNode(), Base,
- EVT.getSizeInBits()/8, i, MFI))
+ LoadSDNode *LD = cast<LoadSDNode>(Elt);
+ LoadSDNode *LDBase = cast<LoadSDNode>(Base);
+ if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
return false;
}
return true;
@@ -7751,44 +7752,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
MVT VT = N->getValueType(0);
MVT EVT = VT.getVectorElementType();
- if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
- // We are looking for load i64 and zero extend. We want to transform
- // it before legalizer has a chance to expand it. Also look for i64
- // BUILD_PAIR bit casted to f64.
- return SDValue();
- // This must be an insertion into a zero vector.
- SDValue HighElt = N->getOperand(1);
- if (!isZeroNode(HighElt))
- return SDValue();
+
+ // Before or during type legalization, we want to try and convert a
+ // build_vector of an i64 load and a zero value into vzext_movl before the
+ // legalizer can break it up.
+ // FIXME: does the case below remove the need to do this?
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) {
+ if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+ return SDValue();
+
+ // This must be an insertion into a zero vector.
+ SDValue HighElt = N->getOperand(1);
+ if (!isZeroNode(HighElt))
+ return SDValue();
+
+ // Value must be a load.
+ SDNode *Base = N->getOperand(0).getNode();
+ if (!isa<LoadSDNode>(Base)) {
+ if (Base->getOpcode() != ISD::BIT_CONVERT)
+ return SDValue();
+ Base = Base->getOperand(0).getNode();
+ if (!isa<LoadSDNode>(Base))
+ return SDValue();
+ }
+
+ // Transform it into VZEXT_LOAD addr.
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+
+ // Load must not be an extload.
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ // Load type should legal type so we don't have to legalize it.
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
+ DCI.CommitTargetLoweringOpt(TLO);
+ return ResNode;
+ }
+
+ // The type legalizer will have broken apart v2i64 build_vector created during
+ // widening before the code which handles that case is run. Look for build
+ // vector (load, load + 4, 0/undef, 0/undef)
+ if (VT == MVT::v4i32 || VT == MVT::v4f32) {
+ LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0));
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1));
+ if (!LD0 || !LD1)
+ return SDValue();
+ if (LD0->getExtensionType() != ISD::NON_EXTLOAD ||
+ LD1->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+ // Make sure the second elt is a consecutive load.
+ if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1,
+ DAG.getMachineFunction().getFrameInfo()))
+ return SDValue();
- // Value must be a load.
- SDNode *Base = N->getOperand(0).getNode();
- if (!isa<LoadSDNode>(Base)) {
- if (Base->getOpcode() != ISD::BIT_CONVERT)
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)
return SDValue();
- Base = Base->getOperand(0).getNode();
- if (!isa<LoadSDNode>(Base))
+ if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)
return SDValue();
+
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1));
+ DCI.CommitTargetLoweringOpt(TLO);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
}
-
- // Transform it into VZEXT_LOAD addr.
- LoadSDNode *LD = cast<LoadSDNode>(Base);
-
- // Load must not be an extload.
- if (LD->getExtensionType() != ISD::NON_EXTLOAD)
- return SDValue();
-
- // Load type should legal type so we don't have to legalize it.
- if (!TLI.isTypeLegal(VT))
- return SDValue();
-
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- TargetLowering::TargetLoweringOpt TLO(DAG);
- TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
- DCI.CommitTargetLoweringOpt(TLO);
- return ResNode;
+ return SDValue();
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index c89a296..b96fdfc 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -1,13 +1,25 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
; RUN: grep unpcklpd %t | count 1
; RUN: grep movapd %t | count 1
+; RUN: grep movaps %t | count 1
; Shows a dag combine bug that will generate an illegal build vector
; with v2i64 build_vector i32, i32.
-define void @test(<2 x double>* %dst, <4 x double> %src) {
+define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
entry:
%tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
store <2 x double> %tmp7.i, <2 x double>* %dst
ret void
}
+
+define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
+entry:
+ %tmp1 = load <4 x i16>* %src
+ %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+ store <4 x i32> %0, <4 x i32>* %dest
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone