aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorNowar Gu <nowar100@gmail.com>2011-06-17 14:29:24 +0800
committerNowar Gu <nowar100@gmail.com>2011-06-20 15:49:07 +0800
commit907af0f20f58f2ea26da7ea64e1f094cd6880db7 (patch)
tree02007757de416c561df174d582205cebfa582801 /lib/CodeGen/SelectionDAG
parent1d4f9a57447faa0142a1d0301e5ce550cfe60c4f (diff)
parentec324e5ae44025c6bdb930b78198f30f807e355b (diff)
downloadexternal_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.zip
external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.gz
external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.bz2
Merge upstream to r133240 at Fri. 17th Jun 2011.
Conflicts: lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/Target/ARM/ARMCodeEmitter.cpp
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp347
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp396
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp22
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp611
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp225
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp32
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h54
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp65
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp482
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp91
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp339
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h12
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp269
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp176
18 files changed, 2137 insertions, 1079 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a9afec8..4ac590a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -138,6 +138,10 @@ namespace {
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
+ void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType);
+
/// combine - call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
@@ -165,6 +169,8 @@ namespace {
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSMULO(SDNode *N);
+ SDValue visitUMULO(SDNode *N);
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
SDValue visitAND(SDNode *N);
@@ -529,7 +535,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
cast<ConstantSDNode>(N0.getOperand(1)),
cast<ConstantSDNode>(N1));
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
- } else if (N0.hasOneUse()) {
+ }
+ if (N0.hasOneUse()) {
// reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
N0.getOperand(0), N1);
@@ -546,7 +553,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
cast<ConstantSDNode>(N1.getOperand(1)),
cast<ConstantSDNode>(N0));
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
- } else if (N1.hasOneUse()) {
+ }
+ if (N1.hasOneUse()) {
// reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
N1.getOperand(0), N0);
@@ -990,6 +998,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
dbgs() << "\nWith: ";
RV.getNode()->dump(&DAG);
dbgs() << '\n');
+
+ // Transfer debug value.
+ DAG.TransferDbgValues(SDValue(N, 0), RV);
WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
@@ -1045,6 +1056,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SMULO: return visitSMULO(N);
+ case ISD::UMULO: return visitUMULO(N);
case ISD::SDIVREM: return visitSDIVREM(N);
case ISD::UDIVREM: return visitUDIVREM(N);
case ISD::AND: return visitAND(N);
@@ -1566,7 +1579,8 @@ static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
SelectionDAG &DAG, bool LegalOperations) {
if (!VT.isVector()) {
return DAG.getConstant(0, VT);
- } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ }
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
// Produce a vector of zeros.
SDValue El = DAG.getConstant(0, VT.getVectorElementType());
std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
@@ -2174,6 +2188,26 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSMULO(SDNode *N) {
+ // (smulo x, 2) -> (saddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMULO(SDNode *N) {
+ // (umulo x, 2) -> (uaddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
if (Res.getNode()) return Res;
@@ -3000,6 +3034,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+ // fold (shl undef, x) -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -3062,26 +3099,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
- // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
- // (srl (and x, (shl -1, c1)), (sub c1, c2))
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
if (N1C && N0.getOpcode() == ISD::SRL &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
uint64_t c2 = N1C->getZExtValue();
- SDValue HiBitsMask =
- DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
- VT.getSizeInBits() - c1),
- VT);
- SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
- N0.getOperand(0),
- HiBitsMask);
- if (c2 > c1)
- return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
- DAG.getConstant(c2-c1, N1.getValueType()));
- else
- return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
- DAG.getConstant(c1-c2, N1.getValueType()));
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ VT.getSizeInBits() - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2-c1);
+ Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c2-c1, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1-c2);
+ Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+ DAG.getConstant(c1-c2, N1.getValueType()));
+ }
+ return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift,
+ DAG.getConstant(Mask, VT));
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
@@ -3323,8 +3361,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
- N0.getOperand(0), N1);
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
AddToWorkList(SmallShift.getNode());
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
}
@@ -3663,6 +3703,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
return true;
}
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+ ISD::NodeType ExtType) {
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+ &Ops[0], Ops.size()));
+ }
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -3751,27 +3813,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
- // Extend SetCC uses if necessary.
- for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
- SDNode *SetCC = SetCCs[i];
- SmallVector<SDValue, 4> Ops;
-
- for (unsigned j = 0; j != 2; ++j) {
- SDValue SOp = SetCC->getOperand(j);
- if (SOp == Trunc)
- Ops.push_back(ExtLoad);
- else
- Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
- N->getDebugLoc(), VT, SOp));
- }
-
- Ops.push_back(SetCC->getOperand(2));
- CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
- SetCC->getValueType(0),
- &Ops[0], Ops.size()));
- }
-
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3799,6 +3842,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
+ // fold (sext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (sextload x), (sext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.sext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
if (N0.getOpcode() == ISD::SETCC) {
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
@@ -3882,7 +3964,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
- return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3957,27 +4039,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
- // Extend SetCC uses if necessary.
- for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
- SDNode *SetCC = SetCCs[i];
- SmallVector<SDValue, 4> Ops;
-
- for (unsigned j = 0; j != 2; ++j) {
- SDValue SOp = SetCC->getOperand(j);
- if (SOp == Trunc)
- Ops.push_back(ExtLoad);
- else
- Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
- N->getDebugLoc(), VT, SOp));
- }
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
- Ops.push_back(SetCC->getOperand(2));
- CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
- SetCC->getValueType(0),
- &Ops[0], Ops.size()));
+ // fold (zext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (zextload x), (zext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(),
+ LN0->getMemoryVT(),
+ LN0->isVolatile(),
+ LN0->isNonTemporal(),
+ LN0->getAlignment());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+ ExtLoad, DAG.getConstant(Mask, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ N0.getOperand(0).getDebugLoc(),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -4012,7 +4115,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
DAG.getConstant(1, EltVT));
- if (VT.getSizeInBits() == N0VT.getSizeInBits()) {
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
// for that matter). Check to see that they are the same size. If so,
@@ -4024,25 +4127,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
&OneOps[0], OneOps.size()));
- } else {
- // If the desired elements are smaller or larger than the source
- // elements we can use a matching integer vector type and then
- // truncate/sign extend
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
- DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
- DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &OneOps[0], OneOps.size()));
- }
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -4110,7 +4212,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// CombineTo deleted the truncate, if needed, but not what's under it.
AddToWorkList(oye);
}
- return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -4166,27 +4268,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
- // Extend SetCC uses if necessary.
- for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
- SDNode *SetCC = SetCCs[i];
- SmallVector<SDValue, 4> Ops;
-
- for (unsigned j = 0; j != 2; ++j) {
- SDValue SOp = SetCC->getOperand(j);
- if (SOp == Trunc)
- Ops.push_back(ExtLoad);
- else
- Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
- N->getDebugLoc(), VT, SOp));
- }
-
- Ops.push_back(SetCC->getOperand(2));
- CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
- SetCC->getValueType(0),
- &Ops[0], Ops.size()));
- }
-
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+ ISD::ANY_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6265,6 +6348,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ST->isNonTemporal(), OrigAlign);
}
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+ return Chain;
+
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
// NOTE: If the original store is volatile, this transform must not increase
@@ -6298,8 +6385,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
- } else if (!ST->isVolatile() &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
@@ -6393,8 +6482,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
SDValue Shorter =
GetDemandedBits(Value,
- APInt::getLowBitsSet(Value.getValueSizeInBits(),
- ST->getMemoryVT().getSizeInBits()));
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits()));
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
@@ -6486,18 +6576,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- // Check if the result type doesn't match the inserted element type. A
- // SCALAR_TO_VECTOR may truncate the inserted element and the
- // EXTRACT_VECTOR_ELT may widen the extracted vector.
- SDValue InOp = InVec.getOperand(0);
- EVT NVT = N->getValueType(0);
- if (InOp.getValueType() != NVT) {
- assert(InOp.getValueType().isInteger() && NVT.isInteger());
- return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
- }
- return InOp;
- }
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ SDValue InOp = InVec.getOperand(0);
+ EVT NVT = N->getValueType(0);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+ }
+ return InOp;
+ }
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
@@ -6558,7 +6648,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
- if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return SDValue();
// If Idx was -1 above, Elt is going to be -1, so just return undef.
@@ -7497,18 +7587,17 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
SrcValueAlign = LD->getOriginalAlignment();
TBAAInfo = LD->getTBAAInfo();
return true;
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ }
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
Size = ST->getMemoryVT().getSizeInBits() >> 3;
SrcValue = ST->getSrcValue();
SrcValueOffset = ST->getSrcValueOffset();
SrcValueAlign = ST->getOriginalAlignment();
TBAAInfo = ST->getTBAAInfo();
- } else {
- llvm_unreachable("FindAliasInfo expected a memory operand");
+ return false;
}
-
- return false;
+ llvm_unreachable("FindAliasInfo expected a memory operand");
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
@@ -7621,13 +7710,13 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// Accumulate all the aliases to this node.
GatherAllAliases(N, OldChain, Aliases);
- if (Aliases.size() == 0) {
- // If no operands then chain to entry token.
+ // If no operands then chain to entry token.
+ if (Aliases.size() == 0)
return DAG.getEntryNode();
- } else if (Aliases.size() == 1) {
- // If a single operand then chain to it. We don't need to revisit it.
+
+ // If a single operand then chain to it. We don't need to revisit it.
+ if (Aliases.size() == 1)
return Aliases[0];
- }
// Construct a custom tailored token factor.
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index ea8ace3..797f174 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -43,6 +43,8 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -109,8 +111,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
// of whether FastISel can handle them.
MVT VT = RealVT.getSimpleVT();
if (!TLI.isTypeLegal(VT)) {
- // Promote MVT::i1 to a legal type though, because it's common and easy.
- if (VT == MVT::i1)
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
else
return 0;
@@ -121,10 +123,9 @@ unsigned FastISel::getRegForValue(const Value *V) {
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
- if (I != FuncInfo.ValueMap.end()) {
- unsigned Reg = I->second;
- return Reg;
- }
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+
unsigned Reg = LocalValueMap[V];
if (Reg != 0)
return Reg;
@@ -164,8 +165,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
Reg =
getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- // Try to emit the constant directly.
- Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+ if (CF->isNullValue()) {
+ Reg = TargetMaterializeFloatZero(CF);
+ } else {
+ // Try to emit the constant directly.
+ Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+ }
if (!Reg) {
// Try to emit the constant by using an integer constant with a cast.
@@ -230,10 +235,10 @@ unsigned FastISel::lookUpRegForValue(const Value *V) {
/// NOTE: This is only necessary because we might select a block that uses
/// a value before we select the block that defines the value. It might be
/// possible to fix this by selecting blocks in reverse postorder.
-unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
+void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
if (!isa<Instruction>(I)) {
LocalValueMap[I] = Reg;
- return Reg;
+ return;
}
unsigned &AssignedReg = FuncInfo.ValueMap[I];
@@ -242,12 +247,11 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
AssignedReg = Reg;
else if (Reg != AssignedReg) {
// Arrange for uses of AssignedReg to be replaced by uses of Reg.
- FuncInfo.RegFixups[AssignedReg] = Reg;
+ for (unsigned i = 0; i < NumRegs; i++)
+ FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
AssignedReg = Reg;
}
-
- return AssignedReg;
}
std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
@@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
return false;
}
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (Op1 == 0) return false;
+
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
+ Op1IsKill, CI->getZExtValue(),
+ VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+
unsigned Op0 = getRegForValue(I->getOperand(0));
- if (Op0 == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// Check if the second operand is a constant and handle it appropriately.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, Op0IsKill,
- CI->getZExtValue());
- if (ResultReg != 0) {
- // We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
- return true;
+ uint64_t Imm = CI->getZExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() &&
+ isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
}
+
+ unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Op0IsKill, Imm, VT.getSimpleVT());
+ if (ResultReg == 0) return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ UpdateValueMap(I, ResultReg);
+ return true;
}
// Check if the second operand is a constant float.
@@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) {
}
bool FastISel::SelectCall(const User *I) {
- const Function *F = cast<CallInst>(I)->getCalledFunction();
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) {
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::INLINEASM))
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
+ return true;
+ }
+
+ const Function *F = Call->getCalledFunction();
if (!F) return false;
// Handle selected intrinsic function calls.
- unsigned IID = F->getIntrinsicID();
- switch (IID) {
+ switch (F->getIntrinsicID()) {
default: break;
case Intrinsic::dbg_declare: {
- const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
!FuncInfo.MF->getMMI().hasDebugInfo())
return true;
@@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) {
}
case Intrinsic::dbg_value: {
// This form of DBG_VALUE is target-independent.
- const DbgValueInst *DI = cast<DbgValueInst>(I);
+ const DbgValueInst *DI = cast<DbgValueInst>(Call);
const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
const Value *V = DI->getValue();
if (!V) {
@@ -523,65 +575,68 @@ bool FastISel::SelectCall(const User *I) {
return true;
}
case Intrinsic::eh_exception: {
- EVT VT = TLI.getValueType(I->getType());
- switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
- default: break;
- case TargetLowering::Expand: {
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- unsigned Reg = TLI.getExceptionAddressRegister();
- const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
- UpdateValueMap(I, ResultReg);
- return true;
- }
- }
- break;
+ EVT VT = TLI.getValueType(Call->getType());
+ if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
+ break;
+
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
+ unsigned Reg = TLI.getExceptionAddressRegister();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
+ UpdateValueMap(Call, ResultReg);
+ return true;
}
case Intrinsic::eh_selector: {
- EVT VT = TLI.getValueType(I->getType());
- switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
- default: break;
- case TargetLowering::Expand: {
- if (FuncInfo.MBB->isLandingPad())
- AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
- else {
+ EVT VT = TLI.getValueType(Call->getType());
+ if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
+ break;
+ if (FuncInfo.MBB->isLandingPad())
+ AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
+ else {
#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
+ FuncInfo.CatchInfoLost.insert(Call);
#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
+ // FIXME: Mark exception selector register as live in. Hack for PR1508.
unsigned Reg = TLI.getExceptionSelectorRegister();
- EVT SrcVT = TLI.getPointerTy();
- const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
-
- bool ResultRegIsKill = hasTrivialKill(I);
-
- // Cast the register to the type of the selector.
- if (SrcVT.bitsGT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
- ResultReg, ResultRegIsKill);
- else if (SrcVT.bitsLT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
- ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
- if (ResultReg == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+ }
- UpdateValueMap(I, ResultReg);
+ unsigned Reg = TLI.getExceptionSelectorRegister();
+ EVT SrcVT = TLI.getPointerTy();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
+
+ bool ResultRegIsKill = hasTrivialKill(Call);
+
+ // Cast the register to the type of the selector.
+ if (SrcVT.bitsGT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+ ResultReg, ResultRegIsKill);
+ else if (SrcVT.bitsLT(MVT::i32))
+ ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+ ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
+ if (ResultReg == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
- return true;
- }
- }
- break;
+ UpdateValueMap(Call, ResultReg);
+
+ return true;
+ }
+ case Intrinsic::objectsize: {
+ ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+ unsigned long long Res = CI->isZero() ? -1ULL : 0;
+ Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+ unsigned ResultReg = getRegForValue(ResCI);
+ if (ResultReg == 0)
+ return false;
+ UpdateValueMap(Call, ResultReg);
+ return true;
}
}
@@ -598,21 +653,13 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
// Unhandled type. Halt "fast" selection and bail.
return false;
- // Check if the destination type is legal. Or as a special case,
- // it may be i1 if we're doing a truncate because that's
- // easy and somewhat common.
+ // Check if the destination type is legal.
if (!TLI.isTypeLegal(DstVT))
- if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
- // Unhandled type. Halt "fast" selection and bail.
- return false;
+ return false;
- // Check if the source operand is legal. Or as a special case,
- // it may be i1 if we're doing zero-extension because that's
- // easy and somewhat common.
+ // Check if the source operand is legal.
if (!TLI.isTypeLegal(SrcVT))
- if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
- // Unhandled type. Halt "fast" selection and bail.
- return false;
+ return false;
unsigned InputReg = getRegForValue(I->getOperand(0));
if (!InputReg)
@@ -621,18 +668,6 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
- // If the operand is i1, arrange for the high bits in the register to be zero.
- if (SrcVT == MVT::i1) {
- SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
- InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill);
- if (!InputReg)
- return false;
- InputRegIsKill = true;
- }
- // If the result is i1, truncate to the target's type for i1 first.
- if (DstVT == MVT::i1)
- DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
-
unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
DstVT.getSimpleVT(),
Opcode,
@@ -784,6 +819,47 @@ FastISel::SelectFNeg(const User *I) {
}
bool
+FastISel::SelectExtractValue(const User *U) {
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI)
+ return false;
+
+ // Make sure we only try to handle extracts with a legal result. But also
+ // allow i1 because it's easy.
+ EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+ if (!RealVT.isSimple())
+ return false;
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+ return false;
+
+ const Value *Op0 = EVI->getOperand(0);
+ const Type *AggTy = Op0->getType();
+
+ // Get the base result register.
+ unsigned ResultReg;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ if (I != FuncInfo.ValueMap.end())
+ ResultReg = I->second;
+ else if (isa<Instruction>(Op0))
+ ResultReg = FuncInfo.InitializeRegForValue(Op0);
+ else
+ return false; // fast-isel can't handle aggregate constants at the moment
+
+ // Get the actual result register, which is an offset from the base register.
+ unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
+
+ for (unsigned i = 0; i < VTIndex; i++)
+ ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+ UpdateValueMap(EVI, ResultReg);
+ return true;
+}
+
+bool
FastISel::SelectOperator(const User *I, unsigned Opcode) {
switch (Opcode) {
case Instruction::Add:
@@ -887,6 +963,9 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
return true;
}
+ case Instruction::ExtractValue:
+ return SelectExtractValue(I);
+
case Instruction::PHI:
llvm_unreachable("FastISel shouldn't visit PHI nodes!");
@@ -966,59 +1045,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
unsigned Op0, bool Op0IsKill,
uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
// First check if immediate type is legal. If not, we can't use the ri form.
unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
if (ResultReg != 0)
return ResultReg;
unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
- if (MaterialReg == 0)
- return 0;
- return FastEmit_rr(VT, VT, Opcode,
- Op0, Op0IsKill,
- MaterialReg, /*Kill=*/true);
-}
-
-/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
-/// to emit an instruction with a floating-point immediate operand using
-/// FastEmit_rf. If that fails, it materializes the immediate into a register
-/// and try FastEmit_rr instead.
-unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
- unsigned Op0, bool Op0IsKill,
- const ConstantFP *FPImm, MVT ImmType) {
- // First check if immediate type is legal. If not, we can't use the rf form.
- unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm);
- if (ResultReg != 0)
- return ResultReg;
-
- // Materialize the constant in a register.
- unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
if (MaterialReg == 0) {
- // If the target doesn't have a way to directly enter a floating-point
- // value into a register, use an alternate approach.
- // TODO: The current approach only supports floating-point constants
- // that can be constructed by conversion from integer values. This should
- // be replaced by code that creates a load from a constant-pool entry,
- // which will require some target-specific work.
- const APFloat &Flt = FPImm->getValueAPF();
- EVT IntVT = TLI.getPointerTy();
-
- uint64_t x[2];
- uint32_t IntBitWidth = IntVT.getSizeInBits();
- bool isExact;
- (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
- if (!isExact)
- return 0;
- APInt IntVal(IntBitWidth, 2, x);
-
- unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
- ISD::Constant, IntVal.getZExtValue());
- if (IntegerReg == 0)
- return 0;
- MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
- ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true);
- if (MaterialReg == 0)
- return 0;
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
+ VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
}
return FastEmit_rr(VT, VT, Opcode,
Op0, Op0IsKill,
@@ -1078,6 +1131,30 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
@@ -1183,6 +1260,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx) {
@@ -1238,7 +1332,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Only handle legal types. Two interesting things to note here. First,
// by bailing out early, we may leave behind some dead instructions,
// since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
- // own moves. Second, this check is necessary becuase FastISel doesn't
+ // own moves. Second, this check is necessary because FastISel doesn't
// use CreateRegs to create registers, so it always creates
// exactly one register for each non-void instruction.
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d8a5770..d518b5d 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -54,25 +54,6 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
return false;
}
-/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
-/// entry block, return true. This includes arguments used by switches, since
-/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) {
- // With FastISel active, we may be splitting blocks, so force creation
- // of virtual registers for all non-dead arguments.
- if (EnableFastISel)
- return A->use_empty();
-
- const BasicBlock *Entry = A->getParent()->begin();
- for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
- UI != E; ++UI) {
- const User *U = *UI;
- if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
- return false; // Use not in entry block.
- }
- return true;
-}
-
FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
: TLI(tli) {
}
@@ -86,16 +67,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(Fn->getReturnType(),
Fn->getAttributes().getRetAttributes(), Outs, TLI);
- CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(),
+ CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
+ Fn->isVarArg(),
Outs, Fn->getContext());
- // Create a vreg for each argument register that is not dead and is used
- // outside of the entry block for the function.
- for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
- AI != E; ++AI)
- if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
- InitializeRegForValue(AI);
-
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
// them.
@@ -181,6 +156,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (PN->use_empty()) continue;
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
DebugLoc DL = PN->getDebugLoc();
unsigned PHIReg = ValueMap[PN];
assert(PHIReg && "PHI node does not have an assigned virtual register!");
@@ -343,7 +322,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
APInt Zero(BitWidth, 0);
DestLOI.KnownZero = Zero;
DestLOI.KnownOne = Zero;
- return;
+ return;
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -375,18 +354,18 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
/// setByValArgumentFrameIndex - Record frame index for the byval
/// argument. This overrides previous frame index entry for this argument,
/// if any.
-void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A,
+void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A,
int FI) {
assert (A->hasByValAttr() && "Argument does not have byval attribute!");
ByValArgFrameIndexMap[A] = FI;
}
-
+
/// getByValArgumentFrameIndex - Get frame index for the byval argument.
/// If the argument does not have any assigned frame index then 0 is
/// returned.
int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) {
assert (A->hasByValAttr() && "Argument does not have byval attribute!");
- DenseMap<const Argument *, int>::iterator I =
+ DenseMap<const Argument *, int>::iterator I =
ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index e309def..2a65d65 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
// the CopyToReg'd destination register instead of creating a new vreg.
bool MatchReg = true;
const TargetRegisterClass *UseRC = NULL;
+ EVT VT = Node->getValueType(ResNo);
+
+ // Stick to the preferred register classes for legal types.
+ if (TLI->isTypeLegal(VT))
+ UseRC = TLI->getRegClassFor(VT);
+
if (!IsClone && !IsCloned)
for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
UI != E; ++UI) {
@@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
break;
}
- EVT VT = Node->getValueType(ResNo);
const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
-
+
// Figure out the register class to create for the destreg.
if (VRBase) {
DstRC = MRI->getRegClass(VRBase);
@@ -283,7 +288,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
"Don't have operand info for this instruction!");
- if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+ if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
@@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
void InstrEmitter::EmitRegSequence(SDNode *Node,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsClone, bool IsCloned) {
- const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
unsigned NewVReg = MRI->createVirtualRegister(RC);
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
unsigned NumOps = Node->getNumOperands();
- assert((NumOps & 1) == 0 &&
- "REG_SEQUENCE must have an even number of operands!");
+ assert((NumOps & 1) == 1 &&
+ "REG_SEQUENCE must have an odd number of operands!");
const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
- for (unsigned i = 0; i != NumOps; ++i) {
+ for (unsigned i = 1; i != NumOps; ++i) {
SDValue Op = Node->getOperand(i);
- if (i & 1) {
+ if ((i & 1) == 0) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b837261..4b6d3ef 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -14,23 +14,16 @@
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
#include "llvm/LLVMContext.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -57,19 +50,13 @@ class SelectionDAGLegalize {
const TargetMachine &TM;
const TargetLowering &TLI;
SelectionDAG &DAG;
- CodeGenOpt::Level OptLevel;
// Libcall insertion helpers.
- /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+ /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been
/// legalized. We use this to ensure that calls are properly serialized
/// against each other, including inserted libcalls.
- SDValue LastCALLSEQ_END;
-
- /// IsLegalizingCall - This member is used *only* for purposes of providing
- /// helpful assertions that a libcall isn't created while another call is
- /// being legalized (which could lead to non-serialized call sequences).
- bool IsLegalizingCall;
+ SmallVector<SDValue, 8> LastCALLSEQ;
enum LegalizeAction {
Legal, // The target natively supports this operation.
@@ -98,13 +85,13 @@ class SelectionDAGLegalize {
}
public:
- SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
+ explicit SelectionDAGLegalize(SelectionDAG &DAG);
/// getTypeAction - Return how we should legalize values of this type, either
/// it is already legal or we need to expand it into multiple registers of
/// smaller integer type, or we need to promote it to a larger type.
LegalizeAction getTypeAction(EVT VT) const {
- return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+ return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT);
}
/// isTypeLegal - Return true if this type is legal on this target.
@@ -147,6 +134,9 @@ private:
DebugLoc dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned, DebugLoc dl);
+
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -158,7 +148,7 @@ private:
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
- SDValue ExpandDivRemLibCall(SDNode *Node, bool isSigned, bool isDIV);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -184,6 +174,15 @@ private:
void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); }
+ void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; }
+ void pushLastCALLSEQ(SDValue s) {
+ LastCALLSEQ.push_back(s);
+ }
+ void popLastCALLSEQ() {
+ LastCALLSEQ.pop_back();
+ }
};
}
@@ -219,18 +218,16 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
}
-SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
- CodeGenOpt::Level ol)
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
: TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
- DAG(dag), OptLevel(ol),
+ DAG(dag),
ValueTypeActions(TLI.getValueTypeActions()) {
assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
"Too many value types for ValueTypeActions to hold!");
}
void SelectionDAGLegalize::LegalizeDAG() {
- LastCALLSEQ_END = DAG.getEntryNode();
- IsLegalizingCall = false;
+ pushLastCALLSEQ(DAG.getEntryNode());
// The legalize process is inherently a bottom-up recursive process (users
// legalize their uses before themselves). Given infinite stack space, we
@@ -258,14 +255,15 @@ void SelectionDAGLegalize::LegalizeDAG() {
/// FindCallEndFromCallStart - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_END node that terminates the call sequence.
static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
- // Nested CALLSEQ_START/END constructs aren't yet legal,
- // but we can DTRT and handle them correctly here.
+ int next_depth = depth;
if (Node->getOpcode() == ISD::CALLSEQ_START)
- depth++;
- else if (Node->getOpcode() == ISD::CALLSEQ_END) {
- depth--;
- if (depth == 0)
+ next_depth = depth + 1;
+ if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ assert(depth > 0 && "negative depth!");
+ if (depth == 1)
return Node;
+ else
+ next_depth = depth - 1;
}
if (Node->use_empty())
return 0; // No CallSeqEnd
@@ -296,7 +294,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User, depth))
+ if (SDNode *Result = FindCallEndFromCallStart(User, next_depth))
return Result;
}
return 0;
@@ -317,6 +315,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
case ISD::CALLSEQ_START:
if (!nested)
return Node;
+ Node = Node->getOperand(0).getNode();
nested--;
break;
case ISD::CALLSEQ_END:
@@ -324,7 +323,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
break;
}
}
- return 0;
+ return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0;
}
/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -433,68 +432,67 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(), Alignment);
- } else {
- // Do a (aligned) store to a stack slot, then copy from the stack slot
- // to the final destination using (unaligned) integer loads and stores.
- EVT StoredVT = ST->getMemoryVT();
- EVT RegVT =
- TLI.getRegisterType(*DAG.getContext(),
- EVT::getIntegerVT(*DAG.getContext(),
- StoredVT.getSizeInBits()));
- unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
- unsigned RegBytes = RegVT.getSizeInBits() / 8;
- unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
-
- // Make sure the stack slot is also aligned for the register type.
- SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
-
- // Perform the original store, only redirected to the stack slot.
- SDValue Store = DAG.getTruncStore(Chain, dl,
- Val, StackPtr, MachinePointerInfo(),
- StoredVT, false, false, 0);
- SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
- SmallVector<SDValue, 8> Stores;
- unsigned Offset = 0;
-
- // Do all but one copies using the full register width.
- for (unsigned i = 1; i < NumRegs; i++) {
- // Load one integer register's worth from the stack slot.
- SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
- MachinePointerInfo(),
- false, false, 0);
- // Store it to the final location. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
- ST->getPointerInfo().getWithOffset(Offset),
- ST->isVolatile(), ST->isNonTemporal(),
- MinAlign(ST->getAlignment(), Offset)));
- // Increment the pointers.
- Offset += RegBytes;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- Increment);
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- }
+ }
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ EVT StoredVT = ST->getMemoryVT();
+ EVT RegVT =
+ TLI.getRegisterType(*DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ StoredVT.getSizeInBits()));
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
- // The last store may be partial. Do a truncating store. On big-endian
- // machines this requires an extending load from the stack slot to ensure
- // that the bits are in the right place.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (StoredBytes - Offset));
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
- // Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
- MachinePointerInfo(),
- MemVT, false, false, 0);
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl,
+ Val, StackPtr, MachinePointerInfo(),
+ StoredVT, false, false, 0);
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
- ST->getPointerInfo()
- .getWithOffset(Offset),
- MemVT, ST->isVolatile(),
- ST->isNonTemporal(),
- MinAlign(ST->getAlignment(), Offset)));
- // The order of the stores doesn't matter - say it with a TokenFactor.
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ ST->isVolatile(), ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
}
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
+
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo()
+ .getWithOffset(Offset),
+ MemVT, ST->isVolatile(),
+ ST->isNonTemporal(),
+ MinAlign(ST->getAlignment(), Offset)));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
}
assert(ST->getMemoryVT().isInteger() &&
!ST->getMemoryVT().isVector() &&
@@ -941,11 +939,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::BR_JT:
case ISD::BR_CC:
case ISD::BRCOND:
- // Branches tweak the chain to include LastCALLSEQ_END
+ assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+ // Branches tweak the chain to include LastCALLSEQ
Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
- LastCALLSEQ_END);
+ getLastCALLSEQ());
Ops[0] = LegalizeOp(Ops[0]);
- LastCALLSEQ_END = DAG.getEntryNode();
+ setLastCALLSEQ(DAG.getEntryNode());
break;
case ISD::SHL:
case ISD::SRL:
@@ -1034,6 +1033,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
case ISD::CALLSEQ_START: {
SDNode *CallEnd = FindCallEndFromCallStart(Node);
+ assert(CallEnd && "didn't find CALLSEQ_END!");
// Recursively Legalize all of the inputs of the call end that do not lead
// to this call start. This ensures that any libcalls that need be inserted
@@ -1050,9 +1050,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Merge in the last call to ensure that this call starts after the last
// call ended.
- if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+ if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) {
Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Tmp1, LastCALLSEQ_END);
+ Tmp1, getLastCALLSEQ());
Tmp1 = LegalizeOp(Tmp1);
}
@@ -1073,25 +1073,29 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// sequence have been legalized, legalize the call itself. During this
// process, no libcalls can/will be inserted, guaranteeing that no calls
// can overlap.
- assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
// Note that we are selecting this call!
- LastCALLSEQ_END = SDValue(CallEnd, 0);
- IsLegalizingCall = true;
+ setLastCALLSEQ(SDValue(CallEnd, 0));
// Legalize the call, starting from the CALLSEQ_END.
- LegalizeOp(LastCALLSEQ_END);
- assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+ LegalizeOp(getLastCALLSEQ());
return Result;
}
case ISD::CALLSEQ_END:
- // If the CALLSEQ_START node hasn't been legalized first, legalize it. This
- // will cause this node to be legalized as well as handling libcalls right.
- if (LastCALLSEQ_END.getNode() != Node) {
- LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
- DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
- assert(I != LegalizedNodes.end() &&
- "Legalizing the call start should have legalized this node!");
- return I->second;
+ {
+ SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node);
+
+ // If the CALLSEQ_START node hasn't been legalized first, legalize it.
+ // This will cause this node to be legalized as well as handling libcalls
+ // right.
+ if (getLastCALLSEQ().getNode() != Node) {
+ LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0));
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ assert(I != LegalizedNodes.end() &&
+ "Legalizing the call start should have legalized this node!");
+ return I->second;
+ }
+
+ pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0));
}
// Otherwise, the call start has been legalized and everything is going
@@ -1119,9 +1123,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result.getResNo());
}
}
- assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
// This finishes up call legalization.
- IsLegalizingCall = false;
+ popLastCALLSEQ();
// If the CALLSEQ_END node has a flag, remember that we legalized it.
AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
@@ -1371,6 +1374,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp2 = LegalizeOp(Load.getValue(1));
break;
}
+
+ // If this is a promoted vector load, and the vector element types are
+ // legal, then scalarize it.
+ if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+ isTypeLegal(Node->getValueType(0).getScalarType())) {
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+ SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ Node->getValueType(0).getScalarType(),
+ Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
+ break;
+ }
+
+ // If this is a promoted vector load, and the vector element types are
+ // illegal, create the promoted vector from bitcasted segments.
+ if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+ EVT MemElemTy = Node->getValueType(0).getScalarType();
+ EVT SrcSclrTy = SrcVT.getScalarType();
+ unsigned SizeRatio =
+ (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+ SmallVector<SDValue, 8> LoadVals;
+ SmallVector<SDValue, 8> LoadChains;
+ unsigned NumElem = SrcVT.getVectorNumElements();
+ unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+ SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ SrcVT.getScalarType(),
+ Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcVT.getScalarType(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ if (TLI.isBigEndian()) {
+ // MSB (which is garbage, comes first)
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ for (unsigned i = 0; i<SizeRatio-1; ++i)
+ LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+ } else {
+ // LSB (which is data, comes first)
+ for (unsigned i = 0; i<SizeRatio-1; ++i)
+ LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ }
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getScalarType(), NumElem*SizeRatio);
+ SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ TempWideVector, &LoadVals[0], LoadVals.size());
+
+ // Cast to the correct type
+ ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes.
+ break;
+
+ }
+
// FIXME: This does not work for vectors on most targets. Sign- and
// zero-extend operations are currently folded into extending loads,
// whether they are legal or not, and then we end up here without any
@@ -1546,6 +1634,88 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result = TLI.LowerOperation(Result, DAG);
break;
case Expand:
+
+ EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+ EVT NarrowScalarVT = StVT.getScalarType();
+
+ // The Store type is illegal, must scalarize the vector store.
+ SmallVector<SDValue, 8> Stores;
+ bool ScalarLegal = isTypeLegal(WideScalarVT);
+ if (!isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+ unsigned NumElem = StVT.getVectorNumElements();
+
+ unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize))
+ ScalarSize = NextPowerOf2(ScalarSize);
+ // Types smaller than 8 bits are promoted to 8 bits.
+ ScalarSize = std::max<unsigned>(ScalarSize, 8);
+ // Store stride
+ unsigned Stride = ScalarSize/8;
+ assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
+
+ for (unsigned Idx=0; Idx<NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+ Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+ SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+ ST->getPointerInfo().getWithOffset(Idx*Stride),
+ isVolatile, isNonTemporal, Alignment);
+ Stores.push_back(Store);
+ }
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ break;
+ }
+
+ // The Store type is illegal, must scalarize the vector store.
+ // However, the scalar type is illegal. Must bitcast the result
+ // and store it in smaller parts.
+ if (!isTypeLegal(StVT) && StVT.isVector()) {
+ unsigned WideNumElem = StVT.getVectorNumElements();
+ unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
+
+ unsigned SizeRatio =
+ (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
+
+ EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
+ SizeRatio*WideNumElem);
+
+ // Cast the wide elem vector to wider vec with smaller elem type.
+ // Example <2 x i64> -> <4 x i32>
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+ for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
+ // Extract elment i
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+ // bump pointer.
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(Stride));
+
+ // Store if, this element is:
+ // - First element on big endian, or
+ // - Last element on little endian
+ if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
+ ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
+ SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+ ST->getPointerInfo().getWithOffset(Idx*Stride),
+ isVolatile, isNonTemporal, Alignment);
+ Stores.push_back(Store);
+ }
+ }
+ Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Stores[0], Stores.size());
+ break;
+ }
+
+
// TRUNCSTORE:i16 i32 -> STORE i16
assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
@@ -2007,7 +2177,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// and leave the Hi part unset.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
@@ -2043,9 +2212,43 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return DAG.getRoot();
// Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, DebugLoc dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue,SDValue> CallInfo =
+ TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
+
+ // Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
LegalizeOp(CallInfo.second);
+
return CallInfo.first;
}
@@ -2055,7 +2258,6 @@ std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
@@ -2081,7 +2283,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Callee, Args, DAG, Node->getDebugLoc());
// Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
LegalizeOp(CallInfo.second);
return CallInfo;
@@ -2121,10 +2323,9 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
-/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
-/// pairs.
-SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned,
- bool isDIV) {
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
default: assert(0 && "Unexpected request for libcall!");
@@ -2135,17 +2336,18 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned,
case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
}
- if (!TLI.getLibcallName(LC))
- return SDValue();
+ return TLI.getLibcallName(LC) != 0;
+}
- // Only issue divrem libcall if both quotient and remainder are needed.
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
unsigned OtherOpcode = 0;
- if (isSigned) {
+ if (isSigned)
OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
- } else {
+ else
OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
- }
- SDNode *OtherNode = 0;
+
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
@@ -2155,32 +2357,28 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned,
continue;
if (User->getOpcode() == OtherOpcode &&
User->getOperand(0) == Op0 &&
- User->getOperand(1) == Op1) {
- OtherNode = User;
- break;
- }
+ User->getOperand(1) == Op1)
+ return true;
}
- if (!OtherNode)
- return SDValue();
+ return false;
+}
- // If the libcall is already generated, no need to issue it again.
- DenseMap<SDValue, SDValue>::iterator I
- = LegalizedNodes.find(SDValue(OtherNode,0));
- if (I != LegalizedNodes.end()) {
- OtherNode = I->second.getNode();
- SDNode *Chain = OtherNode->getOperand(0).getNode();
- for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
- if (User == OtherNode)
- continue;
- if (isDIV) {
- assert(User->getOpcode() == ISD::CopyFromReg);
- } else {
- assert(User->getOpcode() == ISD::LOAD);
- }
- return SDValue(User, 0);
- }
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ default: assert(0 && "Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
}
// The input chain to this libcall is the entry node of the function.
@@ -2221,14 +2419,15 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned,
/*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
// Legalize the call sequence, starting with the chain. This will advance
- // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
LegalizeOp(CallInfo.second);
// Remainder is loaded back from the stack frame.
- SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
+ SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr,
MachinePointerInfo(), false, false, 0);
- return isDIV ? CallInfo.first : Rem;
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
}
/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
@@ -2878,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
}
case ISD::FP_ROUND_INREG: {
// The only way we can lower this is to turn it into a TRUNCSTORE,
- // EXTLOAD pair, targetting a temporary location (a stack slot).
+ // EXTLOAD pair, targeting a temporary location (a stack slot).
// NOTE: there is a choice here between constantly creating new stack
// slots and always reusing the same one. We currently always create
@@ -3204,28 +3403,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
Tmp2 = Node->getOperand(0);
Tmp3 = Node->getOperand(1);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ UseDivRem(Node, isSigned, false))) {
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
- } else if (isSigned) {
- Tmp1 = ExpandDivRemLibCall(Node, true, false);
- if (!Tmp1.getNode())
- Tmp1 = ExpandIntLibCall(Node, true,
- RTLIB::SREM_I8,
- RTLIB::SREM_I16, RTLIB::SREM_I32,
- RTLIB::SREM_I64, RTLIB::SREM_I128);
- } else {
- Tmp1 = ExpandDivRemLibCall(Node, false, false);
- if (!Tmp1.getNode())
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::UREM_I8,
- RTLIB::UREM_I16, RTLIB::UREM_I32,
- RTLIB::UREM_I64, RTLIB::UREM_I128);
- }
+ } else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128);
Results.push_back(Tmp1);
break;
}
@@ -3235,26 +3431,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+ (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ UseDivRem(Node, isSigned, true)))
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
- else if (isSigned) {
- Tmp1 = ExpandDivRemLibCall(Node, true, true);
- if (!Tmp1.getNode()) {
- Tmp1 = ExpandIntLibCall(Node, true,
- RTLIB::SDIV_I8,
- RTLIB::SDIV_I16, RTLIB::SDIV_I32,
- RTLIB::SDIV_I64, RTLIB::SDIV_I128);
- }
- } else {
- Tmp1 = ExpandDivRemLibCall(Node, false, true);
- if (!Tmp1.getNode()) {
- Tmp1 = ExpandIntLibCall(Node, false,
- RTLIB::UDIV_I8,
- RTLIB::UDIV_I16, RTLIB::UDIV_I32,
- RTLIB::UDIV_I64, RTLIB::UDIV_I128);
- }
- }
+ else if (isSigned)
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+ else
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128);
Results.push_back(Tmp1);
break;
}
@@ -3271,6 +3462,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Results.push_back(Tmp1.getValue(1));
break;
}
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
case ISD::MUL: {
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3355,6 +3551,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::UMULO:
case ISD::SMULO: {
EVT VT = Node->getValueType(0);
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue BottomHalf;
@@ -3372,7 +3569,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TopHalf = BottomHalf.getValue(1);
} else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() * 2))) {
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -3385,7 +3581,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
// have a libcall big enough.
// Also, we can fall back to a division in some cases, but that's a big
// performance hit in the general case.
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (WideVT == MVT::i16)
LC = RTLIB::MUL_I16;
@@ -3396,15 +3591,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
else if (WideVT == MVT::i128)
LC = RTLIB::MUL_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
- LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
- RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
- SDValue Ret = ExpandLibCall(LC, Node, isSigned);
- BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
- TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
- DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
- TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+ SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(1));
}
+
if (isSigned) {
Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
TLI.getShiftAmountTy(BottomHalf.getValueType()));
@@ -3486,9 +3693,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Tmp2.getOperand(0), Tmp2.getOperand(1),
Node->getOperand(2));
} else {
+ // We test only the i1 bit. Skip the AND if UNDEF.
+ Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+ DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, Tmp2.getValueType()));
Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
- DAG.getCondCode(ISD::SETNE), Tmp2,
- DAG.getConstant(0, Tmp2.getValueType()),
+ DAG.getCondCode(ISD::SETNE), Tmp3,
+ DAG.getConstant(0, Tmp3.getValueType()),
Node->getOperand(2));
}
Results.push_back(Tmp1);
@@ -3539,7 +3750,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
Tmp2, Tmp3, Tmp4, dl);
- LastCALLSEQ_END = DAG.getEntryNode();
+ assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+ setLastCALLSEQ(DAG.getEntryNode());
assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
@@ -3697,9 +3909,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
// SelectionDAG::Legalize - This is the entry point for the file.
//
-void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
+void SelectionDAG::Legalize() {
/// run - This is the main entry point to this class.
///
- SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+ SelectionDAGLegalize(*this).LegalizeDAG();
}
-
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 935aab0..da75b8a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -73,6 +73,17 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::BUILD_VECTOR:
+ Res = PromoteIntRes_BUILD_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
@@ -174,24 +185,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
default:
assert(false && "Unknown type action!");
break;
- case Legal:
+ case TargetLowering::TypeLegal:
break;
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
if (NOutVT.bitsEq(NInVT))
// The input promotes to the same size. Convert the promoted value.
return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+ if (NInVT.isVector())
+ // Promote vector element via memory load/store.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ CreateStackStoreLoad(InOp, OutVT));
break;
- case SoftenFloat:
+ case TargetLowering::TypeSoftenFloat:
// Promote the integer operand by hand.
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
- case ExpandInteger:
- case ExpandFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
break;
- case ScalarizeVector:
+ case TargetLowering::TypeScalarizeVector:
// Convert the element to an integer and promote it by hand.
- return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
- BitConvertToInteger(GetScalarizedVector(InOp)));
- case SplitVector: {
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ break;
+ case TargetLowering::TypeSplitVector: {
// For example, i32 = BITCAST v2i16 on alpha. Convert the split
// pieces of the input into integers and reassemble in the final type.
SDValue Lo, Hi;
@@ -208,7 +225,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
JoinIntegers(Lo, Hi));
return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
- case WidenVector:
+ case TargetLowering::TypeWidenVector:
if (OutVT.bitsEq(NInVT))
// The input is widened to the same size. Convert to the widened value.
return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
@@ -342,7 +359,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
DebugLoc dl = N->getDebugLoc();
- if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
SDValue Res = GetPromotedInteger(N->getOperand(0));
assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
@@ -507,11 +525,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
switch (getTypeAction(N->getOperand(0).getValueType())) {
default: llvm_unreachable("Unknown type action!");
- case Legal:
- case ExpandInteger:
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypeExpandInteger:
Res = N->getOperand(0);
break;
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
Res = GetPromotedInteger(N->getOperand(0));
break;
}
@@ -557,9 +575,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
DebugLoc DL = N->getDebugLoc();
EVT SmallVT = LHS.getValueType();
- // To determine if the result overflowed in a larger type, we extend the input
- // to the larger type, do the multiply, then check the high bits of the result
- // to see if the overflow happened.
+ // To determine if the result overflowed in a larger type, we extend the
+ // input to the larger type, do the multiply, then check the high bits of
+ // the result to see if the overflow happened.
if (N->getOpcode() == ISD::SMULO) {
LHS = SExtPromotedInteger(LHS);
RHS = SExtPromotedInteger(RHS);
@@ -569,8 +587,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
}
SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
- // Overflow occurred iff the high part of the result does not zero/sign-extend
- // the low part.
+ // Overflow occurred iff the high part of the result does not
+ // zero/sign-extend the low part.
SDValue Overflow;
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred iff the high part is non-zero.
@@ -672,6 +690,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONVERT_RNDSAT:
Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
case ISD::INSERT_VECTOR_ELT:
@@ -952,7 +972,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
SDValue Op = GetPromotedInteger(N->getOperand(0));
Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
- return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+ return DAG.getZeroExtendInReg(Op, dl,
+ N->getOperand(0).getValueType().getScalarType());
}
@@ -1513,7 +1534,8 @@ void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
- assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
@@ -2030,7 +2052,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
- assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
@@ -2178,7 +2201,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
} else {
// For example, extension of an i48 to an i64. The operand type necessarily
// promotes to the result type, so will end up being expanded too.
- assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
"Only know how to promote this result!");
SDValue Res = GetPromotedInteger(Op);
assert(Res.getValueType() == N->getValueType(0) &&
@@ -2613,3 +2637,158 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
"Don't know how to expand this UINT_TO_FP!");
return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned OutNumElems = N->getValueType(0).getVectorNumElements();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue BaseIdx = N->getOperand(1);
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != OutNumElems; ++i) {
+
+ // Extract the element from the original vector.
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+ BaseIdx, DAG.getIntPtrConstant(i));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InVT.getVectorElementType(), N->getOperand(0), Index);
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+ // Insert the converted element to the new vector.
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ NewMask.push_back(SV->getMaskElt(i));
+ }
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+
+ return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumElems = N->getNumOperands();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ assert(!InVT.isVector() && "Input must not be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+ EVT InElVT = InVT.getVectorElementType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0);
+
+ SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+ NOutVTElem, N->getOperand(1));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,NOutVT,
+ ConvertedVector, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = N->getOperand(1);
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ V0->getValueType(0).getScalarType(), V0, V1);
+
+ return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+ SmallVector<SDValue, 8> NewOps;
+
+ // For each incoming vector
+ for (unsigned VecIdx = 0, E = N->getNumOperands(); VecIdx!= E; ++VecIdx) {
+ SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+ EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+ unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+ for (unsigned i=0; i<NumElem; ++i) {
+ // Extract element from incoming vector
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
+ Incoming, DAG.getIntPtrConstant(i));
+ SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+ NewOps.push_back(Tr);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0),
+ &NewOps[0], NewOps.size());
+ }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index cedda7e..ba658b0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -224,38 +224,38 @@ bool DAGTypeLegalizer::run() {
switch (getTypeAction(ResultVT)) {
default:
assert(false && "Unknown action!");
- case Legal:
+ case TargetLowering::TypeLegal:
break;
// The following calls must take care of *all* of the node's results,
// not just the illegal result they were passed (this includes results
// with a legal type). Results can be remapped using ReplaceValueWith,
// or their promoted/expanded/etc values registered in PromotedIntegers,
// ExpandedIntegers etc.
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
PromoteIntegerResult(N, i);
Changed = true;
goto NodeDone;
- case ExpandInteger:
+ case TargetLowering::TypeExpandInteger:
ExpandIntegerResult(N, i);
Changed = true;
goto NodeDone;
- case SoftenFloat:
+ case TargetLowering::TypeSoftenFloat:
SoftenFloatResult(N, i);
Changed = true;
goto NodeDone;
- case ExpandFloat:
+ case TargetLowering::TypeExpandFloat:
ExpandFloatResult(N, i);
Changed = true;
goto NodeDone;
- case ScalarizeVector:
+ case TargetLowering::TypeScalarizeVector:
ScalarizeVectorResult(N, i);
Changed = true;
goto NodeDone;
- case SplitVector:
+ case TargetLowering::TypeSplitVector:
SplitVectorResult(N, i);
Changed = true;
goto NodeDone;
- case WidenVector:
+ case TargetLowering::TypeWidenVector:
WidenVectorResult(N, i);
Changed = true;
goto NodeDone;
@@ -277,36 +277,36 @@ ScanOperands:
switch (getTypeAction(OpVT)) {
default:
assert(false && "Unknown action!");
- case Legal:
+ case TargetLowering::TypeLegal:
continue;
// The following calls must either replace all of the node's results
// using ReplaceValueWith, and return "false"; or update the node's
// operands in place, and return "true".
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
NeedsReanalyzing = PromoteIntegerOperand(N, i);
Changed = true;
break;
- case ExpandInteger:
+ case TargetLowering::TypeExpandInteger:
NeedsReanalyzing = ExpandIntegerOperand(N, i);
Changed = true;
break;
- case SoftenFloat:
+ case TargetLowering::TypeSoftenFloat:
NeedsReanalyzing = SoftenFloatOperand(N, i);
Changed = true;
break;
- case ExpandFloat:
+ case TargetLowering::TypeExpandFloat:
NeedsReanalyzing = ExpandFloatOperand(N, i);
Changed = true;
break;
- case ScalarizeVector:
+ case TargetLowering::TypeScalarizeVector:
NeedsReanalyzing = ScalarizeVectorOperand(N, i);
Changed = true;
break;
- case SplitVector:
+ case TargetLowering::TypeSplitVector:
NeedsReanalyzing = SplitVectorOperand(N, i);
Changed = true;
break;
- case WidenVector:
+ case TargetLowering::TypeWidenVector:
NeedsReanalyzing = WidenVectorOperand(N, i);
Changed = true;
break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 5409b88..06dc40f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -57,16 +57,6 @@ public:
// 1+ - This is a node which has this many unprocessed operands.
};
private:
- enum LegalizeAction {
- Legal, // The target natively supports this type.
- PromoteInteger, // Replace this integer type with a larger one.
- ExpandInteger, // Split this integer type into two of half the size.
- SoftenFloat, // Convert this float type to a same size integer type.
- ExpandFloat, // Split this float type into two of half the size.
- ScalarizeVector, // Replace this one-element vector with its element type.
- SplitVector, // Split this vector type into two of half the size.
- WidenVector // This vector type should be widened into a larger vector.
- };
/// ValueTypeActions - This is a bitvector that contains two bits for each
/// simple value type, where the two bits correspond to the LegalizeAction
@@ -74,41 +64,13 @@ private:
TargetLowering::ValueTypeActionImpl ValueTypeActions;
/// getTypeAction - Return how we should legalize values of this type.
- LegalizeAction getTypeAction(EVT VT) const {
- switch (ValueTypeActions.getTypeAction(VT)) {
- default:
- assert(false && "Unknown legalize action!");
- case TargetLowering::Legal:
- return Legal;
- case TargetLowering::Promote:
- // Promote can mean
- // 1) For integers, use a larger integer type (e.g. i8 -> i32).
- // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
- if (!VT.isVector())
- return PromoteInteger;
- return WidenVector;
- case TargetLowering::Expand:
- // Expand can mean
- // 1) split scalar in half, 2) convert a float to an integer,
- // 3) scalarize a single-element vector, 4) split a vector in two.
- if (!VT.isVector()) {
- if (VT.isInteger())
- return ExpandInteger;
- if (VT.getSizeInBits() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
- return SoftenFloat;
- return ExpandFloat;
- }
-
- if (VT.getVectorNumElements() == 1)
- return ScalarizeVector;
- return SplitVector;
- }
+ TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT);
}
/// isTypeLegal - Return true if this type is legal on this target.
bool isTypeLegal(EVT VT) const {
- return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
+ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
}
/// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -239,7 +201,7 @@ private:
EVT OldVT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
Op = GetPromotedInteger(Op);
- return DAG.getZeroExtendInReg(Op, dl, OldVT);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
}
// Integer Result Promotion.
@@ -248,6 +210,11 @@ private:
SDValue PromoteIntRes_AssertZext(SDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
@@ -289,6 +256,9 @@ private:
SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
+ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a75ae87..85ea6b6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -43,36 +43,36 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
switch (getTypeAction(InVT)) {
default:
assert(false && "Unknown type action!");
- case Legal:
- case PromoteInteger:
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
break;
- case SoftenFloat:
+ case TargetLowering::TypeSoftenFloat:
// Convert the integer operand instead.
SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- case ExpandInteger:
- case ExpandFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
// Convert the expanded pieces of the input.
GetExpandedOp(InOp, Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- case SplitVector:
+ case TargetLowering::TypeSplitVector:
GetSplitVector(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- case ScalarizeVector:
+ case TargetLowering::TypeScalarizeVector:
// Convert the element instead.
SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- case WidenVector: {
+ case TargetLowering::TypeWidenVector: {
assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0b4dd35..b5698f9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -526,13 +526,13 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
switch (getTypeAction(InVT)) {
default:
assert(false && "Unknown type action!");
- case Legal:
- case PromoteInteger:
- case SoftenFloat:
- case ScalarizeVector:
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
break;
- case ExpandInteger:
- case ExpandFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
// A scalar to vector conversion, where the scalar needs expansion.
// If the vector is being split in two then we can just convert the
// expanded pieces.
@@ -545,7 +545,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
return;
}
break;
- case SplitVector:
+ case TargetLowering::TypeSplitVector:
// If the input is a vector that needs to be split, convert each split
// piece of the input now.
GetSplitVector(InOp, Lo, Hi);
@@ -774,7 +774,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
EVT InVT = N->getOperand(0).getValueType();
switch (getTypeAction(InVT)) {
default: llvm_unreachable("Unexpected type action!");
- case Legal: {
+ case TargetLowering::TypeLegal: {
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
@@ -783,10 +783,21 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
break;
}
- case SplitVector:
+ case TargetLowering::TypePromoteInteger: {
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
+ InOp.getValueType().getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ break;
+ }
+ case TargetLowering::TypeSplitVector:
GetSplitVector(N->getOperand(0), Lo, Hi);
break;
- case WidenVector: {
+ case TargetLowering::TypeWidenVector: {
// If the result needs to be split and the input needs to be widened,
// the two types must have different lengths. Use the widened result
// and extract from it to do the split.
@@ -1439,7 +1450,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
unsigned InVTNumElts = InVT.getVectorNumElements();
- if (getTypeAction(InVT) == WidenVector) {
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
@@ -1515,7 +1526,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
SDValue ShOp = N->getOperand(1);
EVT ShVT = ShOp.getValueType();
- if (getTypeAction(ShVT) == WidenVector) {
+ if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
ShOp = GetWidenedVector(ShOp);
ShVT = ShOp.getValueType();
}
@@ -1557,9 +1568,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
default:
assert(false && "Unknown type action!");
break;
- case Legal:
+ case TargetLowering::TypeLegal:
break;
- case PromoteInteger:
+ case TargetLowering::TypePromoteInteger:
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
InOp = GetPromotedInteger(InOp);
@@ -1567,13 +1578,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
if (WidenVT.bitsEq(InVT))
return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
- case SoftenFloat:
- case ExpandInteger:
- case ExpandFloat:
- case ScalarizeVector:
- case SplitVector:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
break;
- case WidenVector:
+ case TargetLowering::TypeWidenVector:
// If the InOp is widened to the same size, convert it. Otherwise, fall
// out of the switch and widen the widened input.
InOp = GetWidenedVector(InOp);
@@ -1653,7 +1664,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
unsigned NumOperands = N->getNumOperands();
bool InputWidened = false; // Indicates we need to widen the input.
- if (getTypeAction(InVT) != WidenVector) {
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
// Add undef vectors to widen to correct length.
unsigned NumConcat = WidenVT.getVectorNumElements() /
@@ -1732,7 +1743,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
unsigned InVTNumElts = InVT.getVectorNumElements();
- if (getTypeAction(InVT) == WidenVector) {
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(InOp);
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
@@ -1800,7 +1811,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue Idx = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
- if (getTypeAction(InOp.getValueType()) == WidenVector)
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
@@ -1882,7 +1893,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
CondEltVT, WidenNumElts);
- if (getTypeAction(CondVT) == WidenVector)
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
if (Cond1.getValueType() != CondWidenVT)
@@ -2026,7 +2037,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
- if (getTypeAction(InOp.getValueType()) == WidenVector)
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
EVT InEltVT = InVT.getVectorElementType();
@@ -2081,7 +2092,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
unsigned NumOperands = N->getNumOperands();
for (unsigned i=0; i < NumOperands; ++i) {
SDValue InOp = N->getOperand(i);
- if (getTypeAction(InOp.getValueType()) == WidenVector)
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
@@ -2153,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
@@ -2168,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned MemVTWidth = MemVT.getSizeInBits();
if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index b2e9c15..f09b381 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -71,6 +71,7 @@ static cl::opt<bool> DisableSchedCycles(
cl::desc("Disable cycle-level precision during preRA scheduling"));
// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
static cl::opt<bool> DisableSchedRegPressure(
"disable-sched-reg-pressure", cl::Hidden, cl::init(false),
cl::desc("Disable regpressure priority in sched=list-ilp"));
@@ -80,6 +81,9 @@ static cl::opt<bool> DisableSchedLiveUses(
static cl::opt<bool> DisableSchedVRegCycle(
"disable-sched-vrcycle", cl::Hidden, cl::init(false),
cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
static cl::opt<bool> DisableSchedStalls(
"disable-sched-stalls", cl::Hidden, cl::init(true),
cl::desc("Disable no-stall priority in sched=list-ilp"));
@@ -102,11 +106,11 @@ static cl::opt<unsigned> AvgIPC(
#ifndef NDEBUG
namespace {
// For sched=list-ilp, Count the number of times each factor comes into play.
- enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic,
- FactOther, NumFactors };
+ enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
+ FactStatic, FactOther, NumFactors };
}
static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"};
+{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
static int FactorCount[NumFactors];
#endif //!NDEBUG
@@ -272,6 +276,33 @@ private:
};
} // end anonymous namespace
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ unsigned &RegClass, unsigned &Cost) {
+ EVT VT = RegDefPos.GetValue();
+
+ // Special handling for untyped values. These values can only come from
+ // the expansion of custom DAG-to-DAG patterns.
+ if (VT == MVT::untyped) {
+ unsigned Opcode = RegDefPos.GetNode()->getMachineOpcode();
+ unsigned Idx = RegDefPos.GetIdx();
+ const TargetInstrDesc Desc = TII->get(Opcode);
+ const TargetRegisterClass *RC = Desc.getRegClass(Idx, TRI);
+ RegClass = RC->getID();
+ // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+ // better way to determine it.
+ Cost = 1;
+ } else {
+ RegClass = TLI->getRepRegClassFor(VT)->getID();
+ Cost = TLI->getRepRegClassCostFor(VT);
+ }
+}
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
@@ -463,6 +494,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
if (DisableSchedCycles)
return;
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
// Bump CurCycle to account for latency. We assume the latency of other
@@ -533,6 +571,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
}
}
+static void resetVRegCycle(SUnit *SU);
+
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
@@ -542,7 +582,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
- DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
+ DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
#endif
// FIXME: Do not modify node height. It may interfere with
@@ -559,7 +600,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
AvailableQueue->ScheduledNode(SU);
// If HazardRec is disabled, and each inst counts as one cycle, then
- // advance CurCycle before ReleasePredecessors to avoid useles pushed to
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
// PendingQueue for schedulers that implement HasReadyFilter.
if (!HazardRec->isEnabled() && AvgIPC < 2)
AdvanceToCycle(CurCycle + 1);
@@ -580,20 +621,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
}
}
+ resetVRegCycle(SU);
+
SU->isScheduled = true;
// Conditions under which the scheduler should eagerly advance the cycle:
// (1) No available instructions
// (2) All pipelines full, so available instructions must have hazards.
//
- // If HazardRec is disabled, the cycle was advanced earlier.
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
//
// Check AvailableQueue after ReleasePredecessors in case of zero latency.
- ++IssueCount;
- if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
- || (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC)
- || AvailableQueue->empty())
- AdvanceToCycle(CurCycle + 1);
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -989,14 +1035,15 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
// Check if Ref is live.
- if (!LiveRegDefs[Reg]) continue;
+ if (!LiveRegDefs[*AliasI]) continue;
// Allow multiple uses of the same def.
- if (LiveRegDefs[Reg] == SU) continue;
+ if (LiveRegDefs[*AliasI] == SU) continue;
// Add Reg to the set of interfering live regs.
- if (RegAdded.insert(Reg))
- LRegs.push_back(Reg);
+ if (RegAdded.insert(*AliasI)) {
+ LRegs.push_back(*AliasI);
+ }
}
}
@@ -1220,7 +1267,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
- DEBUG(dbgs() << "\n*** Examining Available\n";
+ DEBUG(dbgs() << "\nExamining Available:\n";
AvailableQueue->dump(this));
// Pick the best node to schedule taking all constraints into
@@ -1349,6 +1396,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
};
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+ reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
/// bu_ls_rr_sort - Priority function for bottom up register pressure
// reduction scheduler.
struct bu_ls_rr_sort : public queue_sort {
@@ -1549,20 +1611,33 @@ protected:
};
template<class SF>
-class RegReductionPriorityQueue : public RegReductionPQBase {
- static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
- std::vector<SUnit *>::iterator Best = Q.begin();
- for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
- E = Q.end(); I != E; ++I)
- if (Picker(*Best, *I))
- Best = I;
- SUnit *V = *Best;
- if (Best != prior(Q.end()))
- std::swap(*Best, Q.back());
- Q.pop_back();
- return V;
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
}
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
SF Picker;
public:
@@ -1583,7 +1658,7 @@ public:
SUnit *pop() {
if (Queue.empty()) return NULL;
- SUnit *V = popFromQueue(Queue, Picker);
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
V->NodeQueueId = 0;
return V;
}
@@ -1593,7 +1668,7 @@ public:
std::vector<SUnit*> DumpQueue = Queue;
SF DumpPicker = Picker;
while (!DumpQueue.empty()) {
- SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
if (isBottomUp())
dbgs() << "Height " << SU->getHeight() << ": ";
else
@@ -1623,6 +1698,20 @@ ILPBURRPriorityQueue;
// Static Node Priority for Register Pressure Reduction
//===----------------------------------------------------------------------===//
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
+
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
static unsigned
@@ -1661,17 +1750,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() {
CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
}
-void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
- SUnits = &sunits;
- // Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
- // Reroute edges to nodes with multiple uses.
- if (!TracksRegPressure)
- PrescheduleNodesWithMultipleUses();
- // Calculate node priorities.
- CalculateSethiUllmanNumbers();
-}
-
void RegReductionPQBase::addNode(const SUnit *SU) {
unsigned SUSize = SethiUllmanNumbers.size();
if (SUnits->size() > SUSize)
@@ -1710,7 +1788,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
// If SU does not have a register def, schedule it close to its uses
// because it does not lengthen any live ranges.
return 0;
+#if 1
return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
}
//===----------------------------------------------------------------------===//
@@ -1746,8 +1834,10 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
RegDefPos.IsValid(); RegDefPos.Advance()) {
EVT VT = RegDefPos.GetValue();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
return true;
}
@@ -1858,9 +1948,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
if (SkipRegDefs)
continue;
- EVT VT = RegDefPos.GetValue();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ RegPressure[RCId] += Cost;
break;
}
}
@@ -1873,16 +1964,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
if (SkipRegDefs > 0)
continue;
- EVT VT = RegDefPos.GetValue();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ if (RegPressure[RCId] < Cost) {
// Register pressure tracking is imprecise. This can happen. But we try
// hard not to let it happen because it likely results in poor scheduling.
DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
RegPressure[RCId] = 0;
}
else {
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ RegPressure[RCId] -= Cost;
}
}
dumpRegPressure();
@@ -2008,7 +2099,29 @@ static unsigned calcMaxScratches(const SUnit *SU) {
return Scratches;
}
-/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *PredSU = I->getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
/// CopyToReg to a virtual register. This SU def is probably a liveout and
/// it has no other use. It should be scheduled closer to the terminator.
static bool hasOnlyLiveOutUses(const SUnit *SU) {
@@ -2030,62 +2143,71 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
return RetVal;
}
-/// UnitsSharePred - Return true if the two scheduling units share a common
-/// data predecessor.
-static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
- SmallSet<const SUnit*, 4> Preds;
- for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
- Preds.insert(I->getSUnit());
+ if (I->isCtrl()) continue;
+ I->getSUnit()->isVRegCycle = true;
}
- for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
- if (Preds.count(I->getSUnit()))
- return true;
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ I->getSUnit()->isVRegCycle = 0;
+ }
}
- return false;
}
-// Return true if the virtual register defined by VRCycleSU may interfere with
-// VRUseSU.
-//
-// Note: We may consider two SU's that use the same value live into a loop as
-// interferng even though the value is not an induction variable. This is an
-// unfortunate consequence of scheduling on the selection DAG.
-static bool checkVRegCycleInterference(const SUnit *VRCycleSU,
- const SUnit *VRUseSU) {
- for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(),
- E = VRCycleSU->Preds.end(); I != E; ++I) {
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
- SDNode *InNode = I->getSUnit()->getNode();
- if (!InNode || InNode->getOpcode() != ISD::CopyFromReg)
- continue;
- for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(),
- EE = VRUseSU->Preds.end(); II != EE; ++II) {
- if (II->getSUnit() == I->getSUnit())
- return true;
+ if (I->getSUnit()->isVRegCycle &&
+ I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ return true;
}
}
return false;
}
-// Compare the VRegCycle properties of the nodes.
-// Return -1 if left has higher priority, 1 if right has higher priority.
-// Return 0 if priority is equivalent.
-static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) {
- if (left->isVRegCycle && !right->isVRegCycle) {
- if (checkVRegCycleInterference(left, right))
- return -1;
- }
- else if (!left->isVRegCycle && right->isVRegCycle) {
- if (checkVRegCycleInterference(right, left))
- return 1;
- }
- return 0;
-}
-
// Check for either a dependence (latency) or resource (hazard) stall.
//
// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
@@ -2101,23 +2223,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
// Return 0 if latency-based priority is equivalent.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
RegReductionPQBase *SPQ) {
- // If the two nodes share an operand and one of them has a single
- // use that is a live out copy, favor the one that is live out. Otherwise
- // it will be difficult to eliminate the copy if the instruction is a
- // loop induction variable update. e.g.
- // BB:
- // sub r1, r3, #1
- // str r0, [r2, r3]
- // mov r3, r1
- // cmp
- // bne BB
- bool SharePred = UnitsSharePred(left, right);
- // FIXME: Only adjust if BB is a loop back edge.
- // FIXME: What's the cost of a copy?
- int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
- int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
- int LHeight = (int)left->getHeight() - LBonus;
- int RHeight = (int)right->getHeight() - RBonus;
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
BUHasStall(left, LHeight, SPQ);
@@ -2128,48 +2239,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
// If scheduling either one of the node will cause a pipeline stall, sort
// them according to their height.
if (LStall) {
- if (!RStall)
+ if (!RStall) {
+ DEBUG(++FactorCount[FactStall]);
return 1;
- if (LHeight != RHeight)
+ }
+ if (LHeight != RHeight) {
+ DEBUG(++FactorCount[FactStall]);
return LHeight > RHeight ? 1 : -1;
- } else if (RStall)
+ }
+ } else if (RStall) {
+ DEBUG(++FactorCount[FactStall]);
return -1;
+ }
// If either node is scheduling for latency, sort them by height/depth
// and latency.
if (!checkPref || (left->SchedulingPref == Sched::Latency ||
right->SchedulingPref == Sched::Latency)) {
if (DisableSchedCycles) {
- if (LHeight != RHeight)
+ if (LHeight != RHeight) {
+ DEBUG(++FactorCount[FactHeight]);
return LHeight > RHeight ? 1 : -1;
+ }
}
else {
// If neither instruction stalls (!LStall && !RStall) then
// its height is already covered so only its depth matters. We also reach
// this if both stall but have the same height.
- unsigned LDepth = left->getDepth();
- unsigned RDepth = right->getDepth();
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
+ DEBUG(++FactorCount[FactDepth]);
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
}
- if (left->Latency != right->Latency)
+ if (left->Latency != right->Latency) {
+ DEBUG(++FactorCount[FactOther]);
return left->Latency > right->Latency ? 1 : -1;
+ }
}
return 0;
}
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ DEBUG(++FactorCount[FactRegUses]);
+ #ifndef NDEBUG
+ const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+ #endif
+ DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+ << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
if (LPriority != RPriority) {
DEBUG(++FactorCount[FactStatic]);
return LPriority > RPriority;
}
- DEBUG(++FactorCount[FactOther]);
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
// e.g.
@@ -2190,40 +2355,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
- if (LDist != RDist)
+ if (LDist != RDist) {
+ DEBUG(++FactorCount[FactOther]);
return LDist < RDist;
+ }
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
- if (LScratch != RScratch)
+ if (LScratch != RScratch) {
+ DEBUG(++FactorCount[FactOther]);
return LScratch > RScratch;
+ }
- if (!DisableSchedCycles) {
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
+
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
if (result != 0)
return result > 0;
}
else {
- if (left->getHeight() != right->getHeight())
+ if (left->getHeight() != right->getHeight()) {
+ DEBUG(++FactorCount[FactHeight]);
return left->getHeight() > right->getHeight();
+ }
- if (left->getDepth() != right->getDepth())
+ if (left->getDepth() != right->getDepth()) {
+ DEBUG(++FactorCount[FactDepth]);
return left->getDepth() < right->getDepth();
+ }
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
+ DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
// Bottom up
bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
return BURRSort(left, right, SPQ);
}
// Source order, otherwise bottom up.
bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
@@ -2255,6 +2442,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
// Return true if right should be scheduled with higher priority than left.
bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
if (left->isCall || right->isCall)
// No way to compute latency of calls.
return BURRSort(left, right, SPQ);
@@ -2264,24 +2454,22 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
+ DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
<< right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
+ DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
<< left->NodeNum << ")\n");
return false;
}
- int result = 0;
- if (!DisableSchedVRegCycle) {
- result = BUCompareVRegCycle(left, right);
- }
- if (result == 0 && !LHigh && !RHigh) {
- result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (!LHigh && !RHigh) {
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
}
- if (result != 0)
- return result > 0;
return BURRSort(left, right, SPQ);
}
@@ -2322,6 +2510,9 @@ static bool canEnableCoalescing(SUnit *SU) {
// list-ilp is currently an experimental scheduler that allows various
// heuristics to be enabled prior to the normal register reduction logic.
bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
if (left->isCall || right->isCall)
// No way to compute latency of calls.
return BURRSort(left, right, SPQ);
@@ -2347,12 +2538,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (RReduce && !LReduce) return true;
}
- if (!DisableSchedVRegCycle) {
- int result = BUCompareVRegCycle(left, right);
- if (result != 0)
- return result > 0;
- }
-
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
@@ -2391,6 +2576,24 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
return BURRSort(left, right, SPQ);
}
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
+ for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
+ initVRegCycle(&sunits[i]);
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Preschedule for Register Pressure
//===----------------------------------------------------------------------===//
@@ -2668,6 +2871,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
// Top down
bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res < 0;
+
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 24a1937..0d656ef 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -83,10 +83,13 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SU->Latency = Old->Latency;
SU->isVRegCycle = Old->isVRegCycle;
SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
SU->SchedulingPref = Old->SchedulingPref;
Old->isCloned = true;
return SU;
@@ -283,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
+ SmallVector<SUnit*, 8> CallSUnits;
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();
@@ -335,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
if (!HasGlueUse) break;
}
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
// If there are glue operands involved, N is now the bottom-most node
// of the sequence of nodes that are glued together.
// Update the SUnit.
@@ -342,16 +355,26 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
- // Set isVRegCycle if the node operands are live into and value is live out
- // of a single block loop.
- InitVRegCycleFlag(NodeSUnit);
-
// Compute NumRegDefsLeft. This must be done before AddSchedEdges.
InitNumRegDefsLeft(NodeSUnit);
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
}
void ScheduleDAGSDNodes::AddSchedEdges() {
@@ -412,11 +435,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
// it requires a cross class copy (cost < 0). That means we are only
// treating "expensive to copy" register dependency as physical register
// dependency. This may change in the future though.
- if (Cost >= 0)
+ if (Cost >= 0 && !StressSched)
PhysReg = 0;
// If this is a ctrl dep, latency is 1.
unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
@@ -512,47 +539,6 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
}
}
-// Set isVRegCycle if this node's single use is CopyToReg and its only active
-// data operands are CopyFromReg.
-//
-// This is only relevant for single-block loops, in which case the VRegCycle
-// node is likely an induction variable in which the operand and target virtual
-// registers should be coalesced (e.g. pre/post increment values). Setting the
-// isVRegCycle flag helps the scheduler prioritize other uses of the same
-// CopyFromReg so that this node becomes the virtual register "kill". This
-// avoids interference between the values live in and out of the block and
-// eliminates a copy inside the loop.
-void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) {
- if (!BB->isSuccessor(BB))
- return;
-
- SDNode *N = SU->getNode();
- if (N->getGluedNode())
- return;
-
- if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg)
- return;
-
- bool FoundLiveIn = false;
- for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) {
- EVT OpVT = OI->getValueType();
- assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
-
- if (OpVT == MVT::Other)
- continue; // ignore chain operands
-
- if (isPassiveNode(OI->getNode()))
- continue; // ignore constants and such
-
- if (OI->getNode()->getOpcode() != ISD::CopyFromReg)
- return;
-
- FoundLiveIn = true;
- }
- if (FoundLiveIn)
- SU->isVRegCycle = true;
-}
-
void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
assert(SU->NumRegDefsLeft == 0 && "expect a new node");
for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
@@ -562,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
}
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {
SU->Latency = 1;
@@ -569,7 +565,6 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
if (!InstrItins || InstrItins->isEmpty()) {
- SDNode *N = SU->getNode();
if (N && N->isMachineOpcode() &&
TII->isHighLatencyDef(N->getMachineOpcode()))
SU->Latency = HighLatencyCycles;
@@ -641,7 +636,7 @@ namespace {
};
}
-/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
InstrEmitter &Emitter,
SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index b5f68f3..3ad2bd6 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -135,6 +135,14 @@ namespace llvm {
return ValueType;
}
+ const SDNode *GetNode() const {
+ return Node;
+ }
+
+ unsigned GetIdx() const {
+ return DefIdx;
+ }
+
void Advance();
private:
void InitNodeNumDefs();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c2711c8..68eeb60 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2050,14 +2050,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
break;
default:
- // Allow the target to implement this method for its nodes.
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+ if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+ break;
+ // Fallthrough
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
- TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
- Depth);
- }
+ // Allow the target to implement this method for its nodes.
+ TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+ Depth);
return;
}
}
@@ -2322,6 +2323,13 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
return !C->isZero();
// TODO: Recognize more cases here.
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return !C->isNullValue();
+ break;
+ }
return false;
}
@@ -2339,16 +2347,6 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
-bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
- GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
- if (!GA) return false;
- if (GA->getOffset() != 0) return false;
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
- if (!GV) return false;
- return MF->getMMI().hasDebugInfo();
-}
-
-
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
@@ -6304,7 +6302,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
OperandEltVT,
Operand,
- getConstant(i, MVT::i32));
+ getConstant(i, TLI.getPointerTy()));
} else {
// A scalar operand; just use it as is.
Operands[j] = Operand;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 24c325e..d79a5ae 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -280,12 +280,35 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
}
// Vector/Vector bitcast.
- return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ "Cannot handle this kind of promotion");
+ // Promoted vector extract
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT, Val);
+
}
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
+ // Trivial bitcast if the types are the same size and the destination
+ // vector type is legal.
+ if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+ TLI.isTypeLegal(ValueVT))
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle cases such as i8 -> <1 x i1>
+ assert(ValueVT.getVectorNumElements() == 1 &&
"Only trivial scalar-to-vector conversions should get here!");
+
+ if (ValueVT.getVectorNumElements() == 1 &&
+ ValueVT.getVectorElementType() != PartVT) {
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, ValueVT.getScalarType(), Val);
+ }
+
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
@@ -426,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (PartVT.isVector() &&
- PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
EVT ElementVT = PartVT.getVectorElementType();
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
@@ -446,13 +469,33 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
//SDValue UndefElts = DAG.getUNDEF(VectorTy);
//Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
- } else {
+ } else if (PartVT.isVector() &&
+ PartVT.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+ // Promoted vector extract
+ unsigned NumElts = ValueVT.getVectorNumElements();
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i));
+ SDValue Cast = DAG.getNode(ISD::ANY_EXTEND,
+ DL, PartVT.getScalarType(), Ext);
+ NewOps.push_back(Cast);
+ }
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT,
+ &NewOps[0], NewOps.size());
+ } else{
// Vector -> scalar conversion.
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
+ assert(ValueVT.getVectorNumElements() == 1 &&
"Only trivial vector-to-scalar conversions should get here!");
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
PartVT, Val, DAG.getIntPtrConstant(0));
+
+ bool Smaller = ValueVT.bitsLE(PartVT);
+ Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+ DL, PartVT, Val);
}
Parts[0] = Val;
@@ -783,11 +826,20 @@ void SelectionDAGBuilder::clear() {
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
- DanglingDebugInfoMap.clear();
CurDebugLoc = DebugLoc();
HasTailCall = false;
}
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is seperated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+ DanglingDebugInfoMap.clear();
+}
+
/// getRoot - Return the current virtual root of the Selection DAG,
/// flushing any PendingLoad items. This must be done before emitting
/// a store or any other node that may need to be ordered after any
@@ -1175,6 +1227,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
/// created for it, emit nodes to copy the value into the virtual
/// registers.
void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ return;
+
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
assert(!V->use_empty() && "Unused value assigned virtual registers!");
@@ -1223,6 +1279,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
return true;
}
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) {
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (!BPI)
+ return 0;
+ BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock());
+ BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock());
+ return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst) {
+ uint32_t weight = getEdgeWeight(Src, Dst);
+ Src->addSuccessor(Dst, weight);
+}
+
+
static bool InBlock(const Value *V, const BasicBlock *BB) {
if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() == BB;
@@ -1492,8 +1566,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
}
// Update successor info
- SwitchBB->addSuccessor(CB.TrueBB);
- SwitchBB->addSuccessor(CB.FalseBB);
+ addSuccessorWithWeight(SwitchBB, CB.TrueBB);
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1637,8 +1711,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- SwitchBB->addSuccessor(B.Default);
- SwitchBB->addSuccessor(MBB);
+ addSuccessorWithWeight(SwitchBB, B.Default);
+ addSuccessorWithWeight(SwitchBB, MBB);
SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
MVT::Other, CopyTo, RangeCmp,
@@ -1683,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
ISD::SETNE);
}
- SwitchBB->addSuccessor(B.TargetBB);
- SwitchBB->addSuccessor(NextMBB);
+ addSuccessorWithWeight(SwitchBB, B.TargetBB);
+ addSuccessorWithWeight(SwitchBB, NextMBB);
SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
@@ -1924,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
// table.
MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
CurMF->insert(BBI, JumpTableBB);
- CR.CaseBB->addSuccessor(Default);
- CR.CaseBB->addSuccessor(JumpTableBB);
+
+ addSuccessorWithWeight(CR.CaseBB, Default);
+ addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
// Build a vector of destination BBs, corresponding to each target
// of the jump table. If the value of the jump table slot corresponds to
@@ -1952,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
E = DestBBs.end(); I != E; ++I) {
if (!SuccsHandled[(*I)->getNumber()]) {
SuccsHandled[(*I)->getNumber()] = true;
- JumpTableBB->addSuccessor(*I);
+ addSuccessorWithWeight(JumpTableBB, *I);
}
}
@@ -2019,9 +2094,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
APInt Range = ComputeRange(LEnd, RBegin);
assert((Range - 2ULL).isNonNegative() &&
"Invalid case distance");
- double LDensity = (double)LSize.roundToDouble() /
+ // Use volatile double here to avoid excess precision issues on some hosts,
+ // e.g. that use 80-bit X87 registers.
+ volatile double LDensity =
+ (double)LSize.roundToDouble() /
(LEnd - First + 1ULL).roundToDouble();
- double RDensity = (double)RSize.roundToDouble() /
+ volatile double RDensity =
+ (double)RSize.roundToDouble() /
(Last - RBegin + 1ULL).roundToDouble();
double Metric = Range.logBase2()*(LDensity+RDensity);
// Should always split in some non-trivial place
@@ -2367,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
succs.push_back(I.getSuccessor(i));
array_pod_sort(succs.begin(), succs.end());
succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
- for (unsigned i = 0, e = succs.size(); i != e; ++i)
- IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+ for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+ addSuccessorWithWeight(IndirectBrMBB, Succ);
+ }
DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
@@ -2806,16 +2887,18 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
SmallVector<SDValue, 4> Values(NumAggValues);
SDValue Agg = getValue(Op0);
- SDValue Val = getValue(Op1);
unsigned i = 0;
// Copy the beginning value(s) from the original aggregate.
for (; i != LinearIndex; ++i)
Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
SDValue(Agg.getNode(), Agg.getResNo() + i);
// Copy values from the inserted value(s).
- for (; i != LinearIndex + NumValValues; ++i)
- Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
- SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ if (NumValValues) {
+ SDValue Val = getValue(Op1);
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ }
// Copy remaining value(s) from the original aggregate.
for (; i != NumAggValues; ++i)
Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
@@ -2838,6 +2921,13 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
ComputeValueVTs(TLI, ValTy, ValValueVTs);
unsigned NumValValues = ValValueVTs.size();
+
+ // Ignore a extractvalue that produces an empty object
+ if (!NumValValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
SmallVector<SDValue, 4> Values(NumValValues);
SDValue Agg = getValue(Op0);
@@ -4009,6 +4099,24 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+ if (N.getOpcode() != ISD::TRUNCATE)
+ return 0;
+
+ const SDValue &Ext = N.getOperand(0);
+ if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+ const SDValue &CFR = Ext.getOperand(0);
+ if (CFR.getOpcode() == ISD::CopyFromReg)
+ return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+ else
+ if (CFR.getOpcode() == ISD::TRUNCATE)
+ return getTruncatedArgReg(CFR);
+ }
+ return 0;
+}
+
/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
/// argument, create the corresponding DBG_VALUE machine instruction for it now.
/// At the end of instruction selection, they will be inserted to the entry BB.
@@ -4029,10 +4137,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
if (DV.isInlinedFnArgument(MF.getFunction()))
return false;
- MachineBasicBlock *MBB = FuncInfo.MBB;
- if (MBB != &MF.front())
- return false;
-
unsigned Reg = 0;
if (Arg->hasByValAttr()) {
// Byval arguments' frame index is recorded during argument lowering.
@@ -4044,9 +4148,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
Reg = 0;
}
- if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
- Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (N.getNode()) {
+ if (N.getOpcode() == ISD::CopyFromReg)
+ Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ else
+ Reg = getTruncatedArgReg(N);
+ if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
@@ -4208,9 +4315,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
0, dl, SDNodeOrder);
else {
- // Can't do anything with other non-AI cases yet. This might be a
- // parameter of a callee function that got inlined, for example.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, 0, N);
return 0;
}
} else if (AI)
@@ -4403,7 +4510,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_dispatch_setup: {
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- getRoot()));
+ getRoot(), getValue(I.getArgOperand(0))));
return 0;
}
@@ -4672,9 +4779,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
return 0;
- case Intrinsic::trap:
- DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ case Intrinsic::trap: {
+ StringRef TrapFuncName = getTrapFunctionName();
+ if (TrapFuncName.empty()) {
+ DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ }
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> Result =
+ TLI.LowerCallTo(getRoot(), I.getType(),
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+ Args, DAG, getCurDebugLoc());
+ DAG.setRoot(Result.second);
return 0;
+ }
case Intrinsic::uadd_with_overflow:
return implVisitAluOverflow(I, ISD::UADDO);
case Intrinsic::sadd_with_overflow:
@@ -4689,15 +4809,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return implVisitAluOverflow(I, ISD::SMULO);
case Intrinsic::prefetch: {
- SDValue Ops[4];
+ SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = getValue(I.getArgOperand(3));
DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
DAG.getVTList(MVT::Other),
- &Ops[0], 4,
+ &Ops[0], 5,
EVT::getIntegerVT(*Context, 8),
MachinePointerInfo(I.getArgOperand(0)),
0, /* align */
@@ -4784,7 +4905,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Outs, TLI, &Offsets);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- FTy->isVarArg(), Outs, FTy->getContext());
+ DAG.getMachineFunction(),
+ FTy->isVarArg(), Outs,
+ FTy->getContext());
SDValue DemoteStackSlot;
int DemoteStackIdx = -100;
@@ -4814,8 +4937,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
- SDValue ArgNode = getValue(*i);
- Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
unsigned attrInd = i - CS.arg_begin() + 1;
Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
@@ -5255,6 +5384,7 @@ public:
const llvm::Type *OpTy = CallOperandVal->getType();
+ // FIXME: code duplicated from TargetLowering::ParseConstraints().
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (isIndirect) {
@@ -5264,6 +5394,11 @@ public:
OpTy = PtrTy->getElementType();
}
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (const StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
@@ -5315,6 +5450,8 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
EVT ThisVT = MVT::Other;
const TargetRegisterClass *RC = *RCI;
+ if (!RC->isAllocatable())
+ continue;
// If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
@@ -5336,15 +5473,14 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
// frame pointer in functions that need it (due to them not being taken
// out of allocation, because a variable sized allocation hasn't been seen
// yet). This is a slight code pessimization, but should still work.
- for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
- E = RC->allocation_order_end(MF); I != E; ++I)
- if (*I == Reg) {
- // We found a matching register class. Keep looking at others in case
- // we find one with larger registers that this physreg is also in.
- FoundRC = RC;
- FoundVT = ThisVT;
- break;
- }
+ ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(MF);
+ if (std::find(RawOrder.begin(), RawOrder.end(), Reg) != RawOrder.end()) {
+ // We found a matching register class. Keep looking at others in case
+ // we find one with larger registers that this physreg is also in.
+ FoundRC = RC;
+ FoundVT = ThisVT;
+ break;
+ }
}
return FoundRC;
}
@@ -5491,9 +5627,15 @@ static void GetRegistersForValue(SelectionDAG &DAG,
OpInfo.ConstraintVT);
const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ BitVector Reserved = TRI->getReservedRegs(MF);
unsigned NumAllocated = 0;
for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
unsigned Reg = RegClassRegs[i];
+ // Filter out the reserved registers, but note that reserved registers are
+ // not fully determined at this point. We may still decide we need a frame
+ // pointer.
+ if (Reserved.test(Reg))
+ continue;
// See if this register is available.
if ((isOutReg && OutputRegs.count(Reg)) || // Already used.
(isInReg && InputRegs.count(Reg))) { // Already used.
@@ -5542,7 +5684,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::set<unsigned> OutputRegs, InputRegs;
- TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI.ParseConstraints(CS);
+
bool hasMemory = false;
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
@@ -5601,7 +5745,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
hasMemory = true;
else {
for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
- TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ TargetLowering::ConstraintType
+ CType = TLI.getConstraintType(OpInfo.Codes[j]);
if (CType == TargetLowering::C_Memory) {
hasMemory = true;
break;
@@ -5651,12 +5796,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// need to to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
!OpInfo.isIndirect) {
- assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
+ assert((OpInfo.isMultipleAlternative ||
+ (OpInfo.Type == InlineAsm::isInput)) &&
"Can only indirectify direct input operands!");
// Memory operands really want the address of the value. If we don't have
// an indirect input, put it in the constpool if we can, otherwise spill
// it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
// If the operand is a float, integer, or vector constant, spill to a
// constant pool entry to get its address.
@@ -5858,7 +6008,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
- TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty())
report_fatal_error("Invalid operand for inline asm constraint '" +
@@ -6067,14 +6217,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
Flags.setByVal();
const PointerType *Ty = cast<PointerType>(Args[i].Ty);
const Type *ElementTy = Ty->getElementType();
- unsigned FrameAlign = getByValTypeAlignment(ElementTy);
- unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy);
+ Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
// For ByVal, alignment should come from FE. BE will guess if this
// info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
if (Args[i].Alignment)
FrameAlign = Args[i].Alignment;
+ else
+ FrameAlign = getByValTypeAlignment(ElementTy);
Flags.setByValAlign(FrameAlign);
- Flags.setByValSize(FrameSize);
}
if (Args[i].isNest)
Flags.setNest();
@@ -6180,7 +6331,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
// For a function returning void, there is no return value. We can't create
// such a node, so we just return a null return value in that case. In
- // that case, nothing will actualy look at the value.
+ // that case, nothing will actually look at the value.
if (ReturnValues.empty())
return std::make_pair(SDValue(), Chain);
@@ -6219,6 +6370,25 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
#include "llvm/CodeGen/SelectionDAGISel.h"
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ if (EnableFastISel)
+ return A->use_empty();
+
+ const BasicBlock *Entry = A->getParent()->begin();
+ for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+ return false; // Use not in entry block.
+ }
+ return true;
+}
+
void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// If this is the entry block, emit arguments.
const Function &F = *LLVMBB->getParent();
@@ -6273,14 +6443,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
Flags.setByVal();
const PointerType *Ty = cast<PointerType>(I->getType());
const Type *ElementTy = Ty->getElementType();
- unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
- unsigned FrameSize = TD->getTypeAllocSize(ElementTy);
+ Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
if (F.getParamAlignment(Idx))
FrameAlign = F.getParamAlignment(Idx);
+ else
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
Flags.setByValAlign(FrameAlign);
- Flags.setByValSize(FrameSize);
}
if (F.paramHasAttr(Idx, Attribute::Nest))
Flags.setNest();
@@ -6362,8 +6533,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
if (I->use_empty() && NumValues)
SDB->setUnusedArgValue(I, InVals[i]);
- for (unsigned Value = 0; Value != NumValues; ++Value) {
- EVT VT = ValueVTs[Value];
+ for (unsigned Val = 0; Val != NumValues; ++Val) {
+ EVT VT = ValueVTs[Val];
EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
@@ -6382,21 +6553,35 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
i += NumParts;
}
+ // We don't need to do anything else for unused arguments.
+ if (ArgValues.empty())
+ continue;
+
// Note down frame index for byval arguments.
- if (I->hasByValAttr() && !ArgValues.empty())
+ if (I->hasByValAttr())
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
- if (!I->use_empty()) {
- SDValue Res;
- if (!ArgValues.empty())
- Res = DAG.getMergeValues(&ArgValues[0], NumValues,
- SDB->getCurDebugLoc());
- SDB->setValue(I, Res);
-
- // If this argument is live outside of the entry block, insert a copy from
- // whereever we got it to the vreg that other BB's will reference it as.
+ SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+ SDB->getCurDebugLoc());
+ SDB->setValue(I, Res);
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // wherever we got it to the vreg that other BB's will reference it as.
+ if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ // If we can, though, try to skip creating an unnecessary vreg.
+ // FIXME: This isn't very clean... it would be nice to make this more
+ // general. It's also subtly incompatible with the hacks FastISel
+ // uses with vregs.
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ FuncInfo->ValueMap[I] = Reg;
+ continue;
+ }
+ }
+ if (!isOnlyUsedInEntryBlock(I)) {
+ FuncInfo->InitializeRegForValue(I);
SDB->CopyToExportRegsIfNeeded(I);
}
}
@@ -6442,6 +6627,10 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Ignore dead phi's.
if (PN->use_empty()) continue;
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
unsigned Reg;
const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f546612..a1ca891 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -23,7 +23,6 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include <vector>
-#include <set>
namespace llvm {
@@ -333,6 +332,14 @@ public:
/// consumed.
void clear();
+ /// clearDanglingDebugInfo - Clear the dangling debug information
+ /// map. This function is seperated from the clear so that debug
+ /// information that is dangling in a basic block can be properly
+ /// resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached
+ /// to PHI nodes.
+ void clearDanglingDebugInfo();
+
/// getRoot - Return the current virtual root of the Selection DAG,
/// flushing any PendingLoad items. This must be done before emitting
/// a store or any other node that may need to be ordered after any
@@ -427,6 +434,9 @@ private:
const Value* SV,
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
+
+ uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+ void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4b6066e..dc8044b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -55,17 +56,11 @@
using namespace llvm;
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
-#ifndef NDEBUG
-STATISTIC(NumBBWithOutOfOrderLineInfo,
- "Number of blocks with out of order line number info");
-STATISTIC(NumMBBWithOutOfOrderLineInfo,
- "Number of machine blocks with out of order line number info");
-#endif
-
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -74,6 +69,11 @@ static cl::opt<bool>
EnableFastISelAbort("fast-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"fast\" instruction fails"));
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+ cl::desc("use Machine Branch Probability Info"),
+ cl::init(true), cl::Hidden);
+
#ifndef NDEBUG
static cl::opt<bool>
ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
@@ -192,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
DAGSize(0) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
}
SelectionDAGISel::~SelectionDAGISel() {
@@ -205,43 +206,11 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or
-/// other function that gcc recognizes as "returning twice". This is used to
-/// limit code-gen optimizations on the machine function.
-///
-/// FIXME: Remove after <rdar://problem/8031714> is fixed.
-static bool FunctionCallsSetJmp(const Function *F) {
- const Module *M = F->getParent();
- static const char *ReturnsTwiceFns[] = {
- "_setjmp",
- "setjmp",
- "sigsetjmp",
- "setjmp_syscall",
- "savectx",
- "qsetjmp",
- "vfork",
- "getcontext"
- };
-#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *)
-
- for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I)
- if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
- if (!Callee->use_empty())
- for (Value::const_use_iterator
- I = Callee->use_begin(), E = Callee->use_end();
- I != E; ++I)
- if (const CallInst *CI = dyn_cast<CallInst>(*I))
- if (CI->getParent()->getParent() == F)
- return true;
- }
-
- return false;
-#undef NUM_RETURNS_TWICE_FNS
-}
-
/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
/// may trap on it. In this case we have to split the edge so that the path
/// through the predecessor block that doesn't go to the phi block doesn't
@@ -302,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF);
+
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+ else
+ FuncInfo->BPI = 0;
+
SDB->init(GFI, *AA);
SelectAllBasicBlocks(Fn);
@@ -392,7 +367,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
// Determine if there is a call to setjmp in the machine function.
- MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+ MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
// Replace forward-declared registers with the registers containing
// the desired value.
@@ -421,10 +396,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
return true;
}
-void
-SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
- BasicBlock::const_iterator End,
- bool &HadTailCall) {
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
// Lower all of the non-terminator instructions. If a call is emitted
// as a tail call, cease emitting nodes for this block. Terminators
// are handled below.
@@ -438,7 +412,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
// Final step, emit the lowered DAG as machine code.
CodeGenAndEmitDAG();
- return;
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -572,7 +545,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
- CurDAG->Legalize(OptLevel);
+ CurDAG->Legalize();
}
DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
@@ -748,16 +721,49 @@ void SelectionDAGISel::PrepareEHLandingPad() {
-
+/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
+/// load into the specified FoldInst. Note that we could have a sequence where
+/// multiple LLVM IR instructions are folded into the same machineinstr. For
+/// example we could have:
+/// A: x = load i32 *P
+/// B: y = icmp A, 42
+/// C: br y, ...
+///
+/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
+/// any other folded instructions) because it is between A and C.
+///
+/// If we succeed in folding the load into the operation, return true.
+///
bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ const Instruction *FoldInst,
FastISel *FastIS) {
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->use_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->use_back();
+ }
+
// Don't try to fold volatile loads. Target has to deal with alignment
// constraints.
if (LI->isVolatile()) return false;
- // Figure out which vreg this is going into.
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
unsigned LoadReg = FastIS->getRegForValue(LI);
- assert(LoadReg && "Load isn't already assigned a vreg? ");
+ if (LoadReg == 0)
+ return false;
// Check to see what the uses of this vreg are. If it has no uses, or more
// than one use (at the machine instr level) then we can't fold it.
@@ -788,48 +794,17 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
}
-#ifndef NDEBUG
-/// CheckLineNumbers - Check if basic block instructions follow source order
-/// or not.
-static void CheckLineNumbers(const BasicBlock *BB) {
- unsigned Line = 0;
- unsigned Col = 0;
- for (BasicBlock::const_iterator BI = BB->begin(),
- BE = BB->end(); BI != BE; ++BI) {
- const DebugLoc DL = BI->getDebugLoc();
- if (DL.isUnknown()) continue;
- unsigned L = DL.getLine();
- unsigned C = DL.getCol();
- if (L < Line || (L == Line && C < Col)) {
- ++NumBBWithOutOfOrderLineInfo;
- return;
- }
- Line = L;
- Col = C;
- }
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ FunctionLoweringInfo *FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
-/// CheckLineNumbers - Check if machine basic block instructions follow source
-/// order or not.
-static void CheckLineNumbers(const MachineBasicBlock *MBB) {
- unsigned Line = 0;
- unsigned Col = 0;
- for (MachineBasicBlock::const_iterator MBI = MBB->begin(),
- MBE = MBB->end(); MBI != MBE; ++MBI) {
- const DebugLoc DL = MBI->getDebugLoc();
- if (DL.isUnknown()) continue;
- unsigned L = DL.getLine();
- unsigned C = DL.getCol();
- if (L < Line || (L == Line && C < Col)) {
- ++NumMBBWithOutOfOrderLineInfo;
- return;
- }
- Line = L;
- Col = C;
- }
-}
-#endif
-
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
@@ -841,9 +816,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
for (ReversePostOrderTraversal<const Function*>::rpo_iterator
I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
const BasicBlock *LLVMBB = *I;
-#ifndef NDEBUG
- CheckLineNumbers(LLVMBB);
-#endif
if (OptLevel != CodeGenOpt::None) {
bool AllPredsVisited = true;
@@ -856,15 +828,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (AllPredsVisited) {
- for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
- I != E && isa<PHINode>(I); ++I) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ isa<PHINode>(I); ++I)
FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
- }
} else {
- for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
- I != E && isa<PHINode>(I); ++I) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ isa<PHINode>(I); ++I)
FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
- }
}
FuncInfo->VisitedBBs.insert(LLVMBB);
@@ -912,10 +882,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
const Instruction *Inst = llvm::prior(BI);
// If we no longer require this instruction, skip it.
- if (!Inst->mayWriteToMemory() &&
- !isa<TerminatorInst>(Inst) &&
- !isa<DbgInfoIntrinsic>(Inst) &&
- !FuncInfo->isExportedInst(Inst))
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo))
continue;
// Bottom-up: reset the insert pos at the top, after any local-value
@@ -924,16 +891,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->SelectInstruction(Inst)) {
- // If fast isel succeeded, check to see if there is a single-use
- // non-volatile load right before the selected instruction, and see if
- // the load is used by the instruction. If so, try to fold it.
- const Instruction *BeforeInst = 0;
- if (Inst != Begin)
- BeforeInst = llvm::prior(llvm::prior(BI));
- if (BeforeInst && isa<LoadInst>(BeforeInst) &&
- BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
- TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
- --BI; // If we succeeded, don't re-select the load.
+ ++NumFastIselSuccess;
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != Begin) {
+ BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+ // If we succeeded, don't re-select the load.
+ BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
continue;
}
@@ -963,9 +935,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
- // Otherwise, give up on FastISel for the rest of the block.
- // For now, be a little lenient about non-branch terminators.
- if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) {
+ if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
+ // Don't abort, and use a different message for terminator misses.
+ ++NumFastIselFailures;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed terminator: ";
+ Inst->dump();
+ }
+ } else {
++NumFastIselFailures;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
@@ -987,22 +964,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
else
++NumFastIselBlocks;
- // Run SelectionDAG instruction selection on the remainder of the block
- // not handled by FastISel. If FastISel is not run, this is the entire
- // block.
- bool HadTailCall;
- SelectBasicBlock(Begin, BI, HadTailCall);
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+ }
FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
}
delete FastIS;
-#ifndef NDEBUG
- for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end();
- MBI != MBE; ++MBI)
- CheckLineNumbers(MBI);
-#endif
+ SDB->clearDanglingDebugInfo();
}
void
@@ -2634,11 +2609,45 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// instructions that access memory and for ComplexPatterns that match
// loads.
if (EmitNodeInfo & OPFL_MemRefs) {
+ // Only attach load or store memory operands if the generated
+ // instruction may load or store.
+ const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc);
+ bool mayLoad = TID.mayLoad();
+ bool mayStore = TID.mayStore();
+
+ unsigned NumMemRefs = 0;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ ++NumMemRefs;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ ++NumMemRefs;
+ } else {
+ ++NumMemRefs;
+ }
+ }
+
MachineSDNode::mmo_iterator MemRefs =
- MF->allocateMemRefsArray(MatchedMemRefs.size());
- std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs);
+ MF->allocateMemRefsArray(NumMemRefs);
+
+ MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
+ for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ *MemRefsPos++ = *I;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ *MemRefsPos++ = *I;
+ } else {
+ *MemRefsPos++ = *I;
+ }
+ }
+
cast<MachineSDNode>(Res)
- ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
+ ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
}
DEBUG(errs() << " "
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4b0822d..efbfaa4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -26,11 +26,19 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cctype>
using namespace llvm;
+/// We are in the process of implementing a new TypeLegalization action
+/// - the promotion of vector elements. This feature is disabled by default
+/// and only enabled using this flag.
+static cl::opt<bool>
+AllowPromoteIntElem("promote-elements", cl::Hidden,
+ cl::desc("Allow promotion of integer vector element types"));
+
namespace llvm {
TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
bool isLocal = GV->hasLocalLinkage();
@@ -528,7 +536,8 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
/// NOTE: The constructor takes ownership of TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm,
const TargetLoweringObjectFile *tlof)
- : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
+ : TM(tm), TD(TM.getTargetData()), TLOF(*tlof),
+ mayPromoteElements(AllowPromoteIntElem) {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -596,6 +605,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
SchedPreferenceInfo = Sched::Latency;
JumpBufSize = 0;
JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
PrefLoopAlignment = 0;
MinStackArgumentAlignment = 1;
ShouldFoldAtomicFences = false;
@@ -662,10 +673,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
NewVT = EltTy;
IntermediateVT = NewVT;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
EVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
@@ -747,7 +764,7 @@ void TargetLowering::computeRegisterProperties() {
NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
- ValueTypeActions.setTypeAction(ExpandedVT, Expand);
+ ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger);
}
// Inspect all of the ValueType's smaller than the largest integer
@@ -761,7 +778,7 @@ void TargetLowering::computeRegisterProperties() {
} else {
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
(MVT::SimpleValueType)LegalIntReg;
- ValueTypeActions.setTypeAction(IVT, Promote);
+ ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
}
}
@@ -770,7 +787,7 @@ void TargetLowering::computeRegisterProperties() {
NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
TransformToType[MVT::ppcf128] = MVT::f64;
- ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
}
// Decide how to handle f64. If the target does not have native f64 support,
@@ -779,7 +796,7 @@ void TargetLowering::computeRegisterProperties() {
NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
TransformToType[MVT::f64] = MVT::i64;
- ValueTypeActions.setTypeAction(MVT::f64, Expand);
+ ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
}
// Decide how to handle f32. If the target does not have native support for
@@ -789,12 +806,12 @@ void TargetLowering::computeRegisterProperties() {
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
TransformToType[MVT::f32] = MVT::f64;
- ValueTypeActions.setTypeAction(MVT::f32, Promote);
+ ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
} else {
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
TransformToType[MVT::f32] = MVT::i32;
- ValueTypeActions.setTypeAction(MVT::f32, Expand);
+ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
}
}
@@ -810,6 +827,30 @@ void TargetLowering::computeRegisterProperties() {
unsigned NElts = VT.getVectorNumElements();
if (NElts != 1) {
bool IsLegalWiderType = false;
+ // If we allow the promotion of vector elements using a flag,
+ // then return TypePromoteInteger on vector elements.
+ // First try to promote the elements of integer vectors. If no legal
+ // promotion was found, fallback to the widen-vector method.
+ if (mayPromoteElements)
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ // Promote vectors of integers to vectors with the same number
+ // of elements, with a wider element type.
+ if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+ && SVT.getVectorNumElements() == NElts &&
+ isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+
+ if (IsLegalWiderType) continue;
+
+ // Try to widen the vector.
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
EVT SVT = (MVT::SimpleValueType)nVT;
if (SVT.getVectorElementType() == EltVT &&
@@ -818,7 +859,7 @@ void TargetLowering::computeRegisterProperties() {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
- ValueTypeActions.setTypeAction(VT, Promote);
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
IsLegalWiderType = true;
break;
}
@@ -838,10 +879,12 @@ void TargetLowering::computeRegisterProperties() {
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
TransformToType[i] = MVT::Other;
- ValueTypeActions.setTypeAction(VT, Expand);
+ unsigned NumElts = VT.getVectorNumElements();
+ ValueTypeActions.setTypeAction(VT,
+ NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
} else {
TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, Promote);
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
}
}
@@ -890,7 +933,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
// If there is a wider vector type with the same element type as this one,
// we should widen to that legal vector type. This handles things like
// <2 x float> -> <4 x float>.
- if (NumElts != 1 && getTypeAction(VT) == Promote) {
+ if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) {
RegisterVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterVT)) {
IntermediateVT = RegisterVT;
@@ -928,8 +971,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
@@ -1678,6 +1727,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
if (!ShAmt)
break;
+ SDValue Shift = In.getOperand(1);
+ if (TLO.LegalTypes()) {
+ uint64_t ShVal = ShAmt->getZExtValue();
+ Shift =
+ TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+ }
+
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
OperandBitWidth - BitWidth);
HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
@@ -1691,7 +1747,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
Op.getValueType(),
NewTrunc,
- In.getOperand(1)));
+ Shift));
}
break;
}
@@ -1716,26 +1772,28 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::BITCAST:
-#if 0
- // If this is an FP->Int bitcast and if the sign bit is the only thing that
- // is demanded, turn this into a FGETSIGN.
- if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
- MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
- !MVT::isVector(Op.getOperand(0).getValueType())) {
- // Only do this xform if FGETSIGN is valid or if before legalize.
- if (!TLO.AfterLegalize ||
- isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
+ // If this is an FP->Int bitcast and if the sign bit is the only
+ // thing demanded, turn this into a FGETSIGN.
+ if (!Op.getOperand(0).getValueType().isVector() &&
+ NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+ Op.getOperand(0).getValueType().isFloatingPoint()) {
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+ EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
- SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
- Op.getOperand(0));
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+ unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+ if (!OpVTLegal && OpVTSizeInBits > 32)
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
unsigned ShVal = Op.getValueType().getSizeInBits()-1;
- SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(),
Sign, ShAmt));
}
}
-#endif
break;
case ISD::ADD:
case ISD::MUL:
@@ -1842,7 +1900,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, bool foldBooleans,
DAGCombinerInfo &DCI, DebugLoc dl) const {
SelectionDAG &DAG = DCI.DAG;
- LLVMContext &Context = *DAG.getContext();
// These setcc operations always fold.
switch (Cond) {
@@ -1853,12 +1910,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
case ISD::SETTRUE2: return DAG.getConstant(1, VT);
}
- if (isa<ConstantSDNode>(N0.getNode())) {
- // Ensure that the constant occurs on the RHS, and fold constant
- // comparisons.
+ // Ensure that the constant occurs on the RHS, and fold constant
+ // comparisons.
+ if (isa<ConstantSDNode>(N0.getNode()))
return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
- }
-
+
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
@@ -1911,6 +1967,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
}
+ // (zext x) == C --> x == (trunc C)
+ if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreZExt;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreZExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreZExt = N0->getOperand(0);
+ }
+ } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreZExt = N0;
+ }
+ }
+
+ // Make sure we're not loosing bits from the constant.
+ if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+ }
+ }
+
// If the LHS is '(and load, const)', the RHS is 0,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
@@ -1949,7 +2041,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
if (bestWidth) {
- EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
if (newVT.isRound()) {
EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
@@ -2578,9 +2670,13 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- char ConstraintLetter,
+ std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
+
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
case 'X': // Allows any operand; labels (basic block) use this.
@@ -2769,6 +2865,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
report_fatal_error("Indirect operand for inline asm not a pointer!");
OpTy = PtrTy->getElementType();
}
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (const StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
// If OpTy is not a single value, it may be a struct/union that we
// can tile with integers.
if (!OpTy->isSingleValueType() && OpTy->isSized()) {
@@ -3013,7 +3115,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
- TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0],
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
ResultOps, *DAG);
if (!ResultOps.empty()) {
BestType = CType;