aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp518
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp69
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp65
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp115
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp130
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp60
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp17
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp657
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp62
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp276
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp98
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp338
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h11
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp45
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp60
23 files changed, 1971 insertions, 607 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 6023326..9a79217 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -10,13 +10,15 @@ add_llvm_library(LLVMSelectionDAG
LegalizeTypesGeneric.cpp
LegalizeVectorOps.cpp
LegalizeVectorTypes.cpp
+ ResourcePriorityQueue.cpp
ScheduleDAGFast.cpp
- ScheduleDAGRRList.cpp
+ ScheduleDAGRRList.cpp
ScheduleDAGSDNodes.cpp
SelectionDAG.cpp
SelectionDAGBuilder.cpp
SelectionDAGISel.cpp
SelectionDAGPrinter.cpp
+ ScheduleDAGVLIW.cpp
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 80cf0a8..1b148ad 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -63,7 +63,24 @@ namespace {
bool LegalTypes;
// Worklist of all of the nodes that need to be simplified.
- std::vector<SDNode*> WorkList;
+ //
+ // This has the semantics that when adding to the worklist,
+ // the item added must be next to be processed. It should
+ // also only appear once. The naive approach to this takes
+ // linear time.
+ //
+ // To reduce the insert/remove time to logarithmic, we use
+ // a set and a vector to maintain our worklist.
+ //
+ // The set contains the items on the worklist, but does not
+ // maintain the order they should be visited.
+ //
+ // The vector maintains the order nodes should be visited, but may
+ // contain duplicate or removed nodes. When choosing a node to
+ // visit, we pop off the order stack until we find an item that is
+ // also in the contents set. All operations are O(log N).
+ SmallPtrSet<SDNode*, 64> WorkListContents;
+ std::vector<SDNode*> WorkListOrder;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
@@ -83,18 +100,17 @@ namespace {
SDValue visit(SDNode *N);
public:
- /// AddToWorkList - Add to the work list making sure it's instance is at the
- /// the back (next to be processed.)
+ /// AddToWorkList - Add to the work list making sure its instance is at the
+ /// back (next to be processed.)
void AddToWorkList(SDNode *N) {
- removeFromWorkList(N);
- WorkList.push_back(N);
+ WorkListContents.insert(N);
+ WorkListOrder.push_back(N);
}
/// removeFromWorkList - remove all instances of N from the worklist.
///
void removeFromWorkList(SDNode *N) {
- WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
- WorkList.end());
+ WorkListContents.erase(N);
}
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
@@ -158,7 +174,9 @@ namespace {
SDValue visitADD(SDNode *N);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
+ SDValue visitSUBC(SDNode *N);
SDValue visitADDE(SDNode *N);
+ SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
@@ -957,10 +975,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
- WorkList.reserve(DAG.allnodes_size());
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
- WorkList.push_back(I);
+ AddToWorkList(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -971,11 +988,18 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// done. Set it to null to avoid confusion.
DAG.setRoot(SDValue());
- // while the worklist isn't empty, inspect the node on the end of it and
+ // while the worklist isn't empty, find a node and
// try and combine it.
- while (!WorkList.empty()) {
- SDNode *N = WorkList.back();
- WorkList.pop_back();
+ while (!WorkListContents.empty()) {
+ SDNode *N;
+ // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+ // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+ // worklist *should* contain, and check the node we want to visit is should
+ // actually be visited.
+ do {
+ N = WorkListOrder.back();
+ WorkListOrder.pop_back();
+ } while (!WorkListContents.erase(N));
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
@@ -1059,7 +1083,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
+ case ISD::SUBC: return visitSUBC(N);
case ISD::ADDE: return visitADDE(N);
+ case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
@@ -1497,8 +1523,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
- if (N->hasNUsesOfValue(0, 1))
- return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
@@ -1546,7 +1572,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
// fold (adde x, y, false) -> (addc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
- return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+ return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
return SDValue();
}
@@ -1656,6 +1682,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, VT),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (N1C && N1C->isNullValue())
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (N0C && N0C->isAllOnesValue())
+ return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+ MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2320,6 +2391,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+ N0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getNullValue(1);
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ EVT VT = Vector->getValueType(0);
+ unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant =
+ Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnesValue()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), Load->getDebugLoc(),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+
+ // Fold the AND away, taking care not to fold to the old load node if we
+ // replaced it.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -3331,7 +3484,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
- if (N1C && N0.getOpcode() == ISD::SRL &&
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
@@ -4203,6 +4358,29 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
N0.getOperand(0));
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ // FIXME: We should extend this to work for vectors too.
+ if (N0.getOpcode() == ISD::TRUNCATE && !VT.isVector()) {
+ SDValue Op = N0.getOperand(0);
+ APInt TruncatedBits
+ = APInt::getBitsSet(Op.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ std::min(Op.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Op, TruncatedBits, KnownZero, KnownOne);
+ if (TruncatedBits == KnownZero) {
+ if (VT.bitsGT(Op.getValueType()))
+ return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+ if (VT.bitsLT(Op.getValueType()))
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+ return Op;
+ }
+ }
+
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -4883,6 +5061,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ bool isLE = TLI.isLittleEndian();
// noop truncate
if (N0.getValueType() == N->getValueType(0))
@@ -4910,6 +5089,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NVT, N0.getOperand(0));
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ N->getDebugLoc(), TrTy, V,
+ DAG.getConstant(Index, MVT::i32));
+ }
+ }
+
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -5910,6 +6127,44 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode. FIXME: This currently only looks for folding of
+/// [reg +/- imm] addressing modes.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = Use->getValueType(0);
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getValue().getValueType();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ return false;
+ } else if (N->getOpcode() == ISD::SUB) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ return false;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
/// CombineToPreIndexedLoadStore - Try turning a load / store into a
/// pre-indexed load / store when the base pointer is an add or subtract
/// and it has other uses besides the load / store. After the
@@ -5996,10 +6251,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (N->hasPredecessorHelper(Use, Visited, Worklist))
return false;
- if (!((Use->getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
- (Use->getOpcode() == ISD::STORE &&
- cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
RealUse = true;
}
@@ -6096,7 +6350,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
continue;
// Try turning it into a post-indexed load / store except when
- // 1) All uses are load / store ops that use it as base ptr.
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
// 2) Op must be independent of N, i.e. Op is neither a predecessor
// nor a successor of N. Otherwise, if Op is folded that would
// create a cycle.
@@ -6119,10 +6374,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
for (SDNode::use_iterator III = Use->use_begin(),
EEE = Use->use_end(); III != EEE; ++III) {
SDNode *UseUse = *III;
- if (!((UseUse->getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
- (UseUse->getOpcode() == ISD::STORE &&
- cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+ if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
RealUse = true;
}
@@ -6189,7 +6441,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (!LD->isVolatile()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
- if (N->hasNUsesOfValue(0, 0)) {
+ if (!N->hasAnyUseOfValue(0)) {
// It's not safe to use the two value CombineTo variant here. e.g.
// v1, chain2 = load chain1, loc
// v2, chain3 = load chain2, loc
@@ -6214,7 +6466,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
@@ -6873,13 +7125,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
+ EVT VT = InVec.getValueType();
+ EVT NVT = N->getValueType(0);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
SDValue InOp = InVec.getOperand(0);
- EVT NVT = N->getValueType(0);
if (InOp.getValueType() != NVT) {
assert(InOp.getValueType().isInteger() && NVT.isInteger());
return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
@@ -6887,6 +7140,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
return InOp;
}
+ SDValue EltNo = N->getOperand(1);
+ bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD patterns.
+ // For example on AVX, extracting elements from a wide vector without using
+ // extract_subvector.
+ if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+ && ConstEltNo && !LegalOperations) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int NumElem = VT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ // Find the new index to extract from.
+ int OrigElt = SVOp->getMaskElt(Elt);
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(NVT);
+
+ // Select the right vector half to extract from.
+ if (OrigElt < NumElem) {
+ InVec = InVec->getOperand(0);
+ } else {
+ InVec = InVec->getOperand(1);
+ OrigElt -= NumElem;
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+ InVec, DAG.getConstant(OrigElt, MVT::i32));
+ }
+
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
@@ -6894,17 +7179,19 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
- SDValue EltNo = N->getOperand(1);
- if (isa<ConstantSDNode>(EltNo)) {
+ if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
- EVT VT = InVec.getValueType();
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
@@ -6922,12 +7209,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
InVec.getOperand(0).getValueType() == ExtVT &&
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
// =>
// (load $addr+1*size)
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
// If the bit convert changed the number of elements, it is unsafe
// to examine the mask.
if (BCNumEltsChanged)
@@ -6938,14 +7233,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
- if (InVec.getOpcode() == ISD::BITCAST)
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
InVec = InVec.getOperand(0);
+ }
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
}
}
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return SDValue();
@@ -6982,6 +7284,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
// Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
@@ -7011,11 +7316,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// optimizations. We do not handle sign-extend because we can't fill the sign
// using shuffles.
EVT SourceType = MVT::Other;
- bool allAnyExt = true;
- for (unsigned i = 0; i < NumInScalars; ++i) {
+ bool AllAnyExt = true;
+ bool AllUndef = true;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.getOpcode() == ISD::UNDEF) continue;
+ AllUndef = false;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
@@ -7040,15 +7347,17 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
// Check if all of the extends are ANY_EXTENDs.
- allAnyExt &= AnyExt;
+ AllAnyExt &= AnyExt;
}
+ if (AllUndef)
+ return DAG.getUNDEF(VT);
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
EVT OutScalarTy = N->getValueType(0).getScalarType();
- bool validTypes = SourceType != MVT::Other &&
+ bool ValidTypes = SourceType != MVT::Other &&
isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
isPowerOf2_32(SourceType.getSizeInBits());
@@ -7058,11 +7367,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// will be type-legalized to complex code sequences.
// We perform this optimization only before the operation legalizer because we
// may introduce illegal operations.
- if (LegalTypes && !LegalOperations && validTypes) {
+ if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
+ ValidTypes) {
bool isLE = TLI.isLittleEndian();
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
- SDValue Filler = allAnyExt ? DAG.getUNDEF(SourceType):
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
DAG.getConstant(0, SourceType);
unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
@@ -7117,15 +7427,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
break;
}
- // If the input vector type disagrees with the result of the build_vector,
- // we can't make a shuffle.
+ // We allow up to two distinct input vectors.
SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
- if (ExtractedFromVec.getValueType() != VT) {
- VecIn1 = VecIn2 = SDValue(0, 0);
- break;
- }
-
- // Otherwise, remember this. We allow up to two distinct input vectors.
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
@@ -7140,7 +7443,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
}
- // If everything is good, we can make a shuffle operation.
+ // If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -7166,14 +7469,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
Mask.push_back(Idx+NumInScalars);
}
- // Add count and size info.
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Attempt to transform a single input vector to the correct type.
+ if ((VT != VecIn1.getValueType())) {
+ // We don't support shuffeling between TWO values of different types.
+ if (VecIn2.getNode() != 0)
+ return SDValue();
+
+ // We only support widening of vectors which are half the size of the
+ // output registers. For example XMM->YMM widening on X86 with AVX.
+ if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+ return SDValue();
+
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ }
+
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+ // Check that we were able to transform all incoming values to the same type.
+ if (VecIn2.getValueType() != VecIn1.getValueType() ||
+ VecIn1.getValueType() != VT)
+ return SDValue();
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
SDValue Ops[2];
Ops[0] = VecIn1;
- Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ Ops[1] = VecIn2;
return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
}
@@ -7232,15 +7560,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
assert(N0.getValueType().getVectorNumElements() == NumElts &&
"Vector shuffle must be normalized in DAG");
- // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) Idx -= NumElts;
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.getOpcode() == ISD::UNDEF) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx < 0)
+ NewMask.push_back(Idx);
+ else if (Idx < (int)NumElts)
+ NewMask.push_back(Idx + NumElts);
+ else
+ NewMask.push_back(Idx - NumElts);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+ &NewMask[0]);
+ }
+
+ // Remove references to rhs if it is undef
+ if (N1.getOpcode() == ISD::UNDEF) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+ }
// If it is a splat, check if the argument vector is another splat or a
// build_vector with all scalar elements the same.
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
@@ -8029,30 +8405,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue,
- int &SrcValueOffset,
- unsigned &SrcValueAlign,
- const MDNode *&TBAAInfo) const {
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- Ptr = LD->getBasePtr();
- Size = LD->getMemoryVT().getSizeInBits() >> 3;
- SrcValue = LD->getSrcValue();
- SrcValueOffset = LD->getSrcValueOffset();
- SrcValueAlign = LD->getOriginalAlignment();
- TBAAInfo = LD->getTBAAInfo();
- return true;
- }
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- Ptr = ST->getBasePtr();
- Size = ST->getMemoryVT().getSizeInBits() >> 3;
- SrcValue = ST->getSrcValue();
- SrcValueOffset = ST->getSrcValueOffset();
- SrcValueAlign = ST->getOriginalAlignment();
- TBAAInfo = ST->getTBAAInfo();
- return false;
- }
- llvm_unreachable("FindAliasInfo expected a memory operand");
+ SDValue &Ptr, int64_t &Size,
+ const Value *&SrcValue,
+ int &SrcValueOffset,
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
+ LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+ Ptr = LS->getBasePtr();
+ Size = LS->getMemoryVT().getSizeInBits() >> 3;
+ SrcValue = LS->getSrcValue();
+ SrcValueOffset = LS->getSrcValueOffset();
+ SrcValueAlign = LS->getOriginalAlignment();
+ TBAAInfo = LS->getTBAAInfo();
+ return isa<LoadSDNode>(LS);
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b4946ec..fd8ce78 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -561,12 +561,19 @@ bool FastISel::SelectCall(const User *I) {
return true;
}
+ MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+ ComputeUsesVAFloatArgument(*Call, &MMI);
+
const Function *F = Call->getCalledFunction();
if (!F) return false;
// Handle selected intrinsic function calls.
switch (F->getIntrinsicID()) {
default: break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ return true;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
@@ -630,60 +637,6 @@ bool FastISel::SelectCall(const User *I) {
}
return true;
}
- case Intrinsic::eh_exception: {
- EVT VT = TLI.getValueType(Call->getType());
- if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
- break;
-
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- unsigned Reg = TLI.getExceptionAddressRegister();
- const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
- UpdateValueMap(Call, ResultReg);
- return true;
- }
- case Intrinsic::eh_selector: {
- EVT VT = TLI.getValueType(Call->getType());
- if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
- break;
- if (FuncInfo.MBB->isLandingPad())
- AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
- else {
-#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(Call);
-#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
- unsigned Reg = TLI.getExceptionSelectorRegister();
- EVT SrcVT = TLI.getPointerTy();
- const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(Reg);
-
- bool ResultRegIsKill = hasTrivialKill(Call);
-
- // Cast the register to the type of the selector.
- if (SrcVT.bitsGT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
- ResultReg, ResultRegIsKill);
- else if (SrcVT.bitsLT(MVT::i32))
- ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
- ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
- if (ResultReg == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return false;
-
- UpdateValueMap(Call, ResultReg);
-
- return true;
- }
case Intrinsic::objectsize: {
ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -775,8 +728,8 @@ bool FastISel::SelectBitCast(const User *I) {
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
- TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
- TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
// Don't attempt a cross-class copy. It will likely fail.
if (SrcClass == DstClass) {
ResultReg = createResultReg(DstClass);
@@ -1419,8 +1372,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// exactly one register for each non-void instruction.
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
- // Promote MVT::i1.
- if (VT == MVT::i1)
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
else {
FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 90d35cc..8dde919 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
GetReturnInfo(Fn->getReturnType(),
Fn->getAttributes().getRetAttributes(), Outs, TLI);
CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
- Fn->isVarArg(),
+ Fn->isVarArg(),
Outs, Fn->getContext());
// Initialize the mapping of values to registers. This is only set up for
@@ -95,11 +96,13 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
(TySize >= 8 && isa<ArrayType>(Ty) &&
cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+ MayNeedSP);
}
for (; BB != EB; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
@@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
/// argument. This overrides previous frame index entry for this argument,
/// if any.
void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
- int FI) {
+ int FI) {
ByValArgFrameIndexMap[A] = FI;
}
@@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
- DEBUG(dbgs() << "Argument does not have assigned frame index!");
+ DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
return 0;
}
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+ MachineModuleInfo *MMI)
+{
+ FunctionType *FT = cast<FunctionType>(
+ I.getCalledValue()->getType()->getContainedType(0));
+ if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (i->isFloatingPointTy()) {
+ MMI->setUsesVAFloatArgument(true);
+ return;
+ }
+ }
+ }
+ }
+}
+
/// AddCatchInfo - Extract the personality and type infos from an eh.selector
/// call, and add them to the specified machine basic block.
void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
@@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
}
}
-void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
- MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
- SmallPtrSet<const BasicBlock*, 4> Visited;
-
- // The 'eh.selector' call may not be in the direct successor of a basic block,
- // but could be several successors deeper. If we don't find it, try going one
- // level further. <rdar://problem/8824861>
- while (Visited.insert(SuccBB)) {
- for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
- I != E; ++I)
- if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
- // Apply the catch info to LPad.
- AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
-#ifndef NDEBUG
- if (!FLI.MBBMap[SuccBB]->isLandingPad())
- FLI.CatchInfoFound.insert(EHSel);
-#endif
- return;
- }
-
- const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
- if (Br && Br->isUnconditional())
- SuccBB = Br->getSuccessor(0);
- else
- break;
- }
-}
-
/// AddLandingPadInfo - Extract the exception handling information from the
/// landingpad instruction and add them to the specified machine module info.
void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index cb6fd53..1467d88 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
MI->addOperand(MachineOperand::CreateFPImm(CFP));
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
TGA->getTargetFlags()));
@@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
for (unsigned i = 1; i != NumOps; ++i) {
SDValue Op = Node->getOperand(i);
if ((i & 1) == 0) {
- unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
- unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
- const TargetRegisterClass *SRC =
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
- if (SRC && SRC != RC) {
- MRI->setRegClass(NewVReg, SRC);
- RC = SRC;
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
}
}
AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
@@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Create the new machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
- // The MachineInstr constructor adds implicit-def operands. Scan through
- // these to determine which are dead.
- if (MI->getNumOperands() != 0 &&
- Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
- // First, collect all used registers.
- SmallVector<unsigned, 8> UsedRegs;
- for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
- if (F->getOpcode() == ISD::CopyFromReg)
- UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
- else {
- // Collect declared implicit uses.
- const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
- UsedRegs.append(MCID.getImplicitUses(),
- MCID.getImplicitUses() + MCID.getNumImplicitUses());
- // In addition to declared implicit uses, we must also check for
- // direct RegisterSDNode operands.
- for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
- if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
- unsigned Reg = R->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- UsedRegs.push_back(Reg);
- }
- }
- // Then mark unused registers as dead.
- MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
- }
-
// Add result register values for things that are defined by this
// instruction.
if (NumResults)
@@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// hook knows where in the block to insert the replacement code.
MBB->insert(InsertPos, MI);
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<unsigned, 8> UsedRegs;
+
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
- if (Node->hasAnyUseOfValue(i))
- EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
- // If there are no uses, mark the register as dead now, so that
- // MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Glue value, for the benefit of targets still using
- // Glue for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
- MI->addRegisterDead(Reg, TRI);
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
}
}
- // If the instruction has implicit defs and the node doesn't, mark the
- // implicit def as dead. If the node has any glue outputs, we don't do this
- // because we don't know what implicit defs are being used by glued nodes.
- if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
- if (const unsigned *IDList = II.getImplicitDefs()) {
- for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
- i != e; ++i)
- MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
}
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || II.getImplicitDefs())
+ MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
// Run post-isel target hook to adjust this instruction if needed.
#ifdef NDEBUG
@@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
Node->dump();
#endif
llvm_unreachable("This target-independent node should have been selected!");
- break;
case ISD::EntryToken:
llvm_unreachable("EntryToken should have been excluded from the schedule!");
- break;
case ISD::MERGE_VALUES:
case ISD::TokenFactor: // fall thru
break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 75f5761..31df458 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -85,7 +85,7 @@ private:
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
- SmallVectorImpl<int> &Mask) const;
+ ArrayRef<int> Mask) const;
void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
DebugLoc dl);
@@ -177,7 +177,7 @@ public:
SDValue
SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
- SmallVectorImpl<int> &Mask) const {
+ ArrayRef<int> Mask) const {
unsigned NumMaskElts = VT.getVectorNumElements();
unsigned NumDestElts = NVT.getVectorNumElements();
unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -893,7 +893,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->dump( &DAG);
dbgs() << "\n";
#endif
- assert(0 && "Do not know how to legalize this operator!");
+ llvm_unreachable("Do not know how to legalize this operator!");
case ISD::CALLSEQ_START:
case ISD::CALLSEQ_END:
@@ -910,7 +910,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Tmp4 = SDValue(Node, 1);
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned load and the target doesn't support it,
// expand it.
@@ -1079,7 +1079,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Tmp2 = Ch;
} else {
switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
// FALLTHROUGH
@@ -1185,7 +1185,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Tmp3 = ST->getValue();
EVT VT = Tmp3.getValueType();
switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned store and the target doesn't support it,
// expand it.
@@ -1290,7 +1290,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
ReplaceNode(SDValue(Node, 0), Result);
} else {
switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
// If this is an unaligned store and the target doesn't support it,
// expand it.
@@ -1556,7 +1556,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
EVT OpVT = LHS.getValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
- default: assert(0 && "Unknown condition code action!");
+ default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
@@ -1564,7 +1564,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
- default: assert(0 && "Don't know how to expand this condition!");
+ default: llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
@@ -1699,7 +1699,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// If all elements are constants, create a load from the constant pool.
if (isConstant) {
- std::vector<Constant*> CV;
+ SmallVector<Constant*, 16> CV;
for (unsigned i = 0, e = NumElems; i != e; ++i) {
if (ConstantFPSDNode *V =
dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
@@ -1788,7 +1788,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), isTailCall,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
if (!CallInfo.second.getNode())
@@ -1821,7 +1821,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
return CallInfo.first;
@@ -1853,7 +1853,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
return CallInfo;
@@ -1866,7 +1866,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_PPCF128) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
case MVT::f80: LC = Call_F80; break;
@@ -1883,7 +1883,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I128) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
case MVT::i32: LC = Call_I32; break;
@@ -1898,7 +1898,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -1943,7 +1943,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
- default: assert(0 && "Unexpected request for libcall!");
+ default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -1985,7 +1985,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, dl);
// Remainder is loaded back from the stack frame.
SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
@@ -2160,7 +2161,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
// offset depending on the data type.
uint64_t FF;
switch (Op0.getValueType().getSimpleVT().SimpleTy) {
- default: assert(0 && "Unsupported integer type!");
+ default: llvm_unreachable("Unsupported integer type!");
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
@@ -2282,7 +2283,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
EVT SHVT = TLI.getShiftAmountTy(VT);
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
switch (VT.getSimpleVT().SimpleTy) {
- default: assert(0 && "Unhandled Expand type in BSWAP!");
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
case MVT::i16:
Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2339,7 +2340,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
DebugLoc dl) {
switch (Opc) {
- default: assert(0 && "Cannot expand this yet!");
+ default: llvm_unreachable("Cannot expand this yet!");
case ISD::CTPOP: {
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy(VT);
@@ -2438,7 +2439,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic intrinsic Expand!");
- break;
case ISD::ATOMIC_SWAP:
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type for atomic!");
@@ -2564,7 +2564,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
Args, DAG, dl);
@@ -2641,7 +2641,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
Results.push_back(CallResult.second);
@@ -2795,15 +2795,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(2), dl));
break;
case ISD::VECTOR_SHUFFLE: {
- SmallVector<int, 8> Mask;
- cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
- if (!TLI.isTypeLegal(EltVT))
- EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+ if (NewEltVT.bitsLT(EltVT)) {
+
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits()/NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
unsigned NumElems = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Ops;
+ SmallVector<SDValue, 16> Ops;
for (unsigned i = 0; i != NumElems; ++i) {
if (Mask[i] < 0) {
Ops.push_back(DAG.getUNDEF(EltVT));
@@ -2812,14 +2854,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned Idx = Mask[i];
if (Idx < NumElems)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Node->getOperand(0),
+ Op0,
DAG.getIntPtrConstant(Idx)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Node->getOperand(1),
+ Op1,
DAG.getIntPtrConstant(Idx - NumElems)));
}
+
Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
Results.push_back(Tmp1);
break;
}
@@ -2980,7 +3025,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::EXCEPTIONADDR: {
- unsigned Reg = TLI.getExceptionAddressRegister();
+ unsigned Reg = TLI.getExceptionPointerRegister();
assert(Reg && "Can't expand to unknown register!");
Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
Node->getValueType(0)));
@@ -3520,8 +3565,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
}
case ISD::VECTOR_SHUFFLE: {
- SmallVector<int, 8> Mask;
- cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
// Cast the two input vectors.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
@@ -3546,6 +3590,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp1, Tmp2, Node->getOperand(2)));
break;
}
+ case ISD::FPOW: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ISD::FPOW, dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0)));
+ break;
+ }
+ case ISD::FLOG2:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FEXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp2, DAG.getIntPtrConstant(0)));
+ break;
+ }
}
// Replace the original node with the legalized result.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6732d37..e393896 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
case ISD::SETUEQ:
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
break;
- default: assert(false && "Do not know how to soften this setcc!");
+ default: llvm_unreachable("Do not know how to soften this setcc!");
}
}
@@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
switch (SrcVT.getSimpleVT().SimpleTy) {
default:
- assert(false && "Unsupported UINT_TO_FP!");
+ llvm_unreachable("Unsupported UINT_TO_FP!");
case MVT::i32:
Parts = TwoE32;
break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 1c02c4f..41506d1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -212,9 +212,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- break;
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
@@ -252,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
case TargetLowering::TypeWidenVector:
- if (OutVT.bitsEq(NInVT))
- // The input is widened to the same size. Convert to the widened value.
- return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -489,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
- SDValue Mask = GetPromotedInteger(N->getOperand(0));
+ SDValue Mask = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
@@ -1176,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
switch (Opc) {
default:
llvm_unreachable("Unhandled atomic intrinsic Expand!");
- break;
case ISD::ATOMIC_SWAP:
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type for atomic!");
@@ -1395,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
}
}
-#if 0
- // FIXME: This code is broken for shifts with a zero amount!
// If we know that all of the high bits of the shift amount are zero, then we
// can do this as a couple of simple shifts.
if ((KnownZero & HighBitMask) == HighBitMask) {
- // Compute 32-amt.
- SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
- DAG.getConstant(NVTBits, ShTy),
- Amt);
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits-1, ShTy));
+
unsigned Op1, Op2;
switch (N->getOpcode()) {
default: llvm_unreachable("Unknown shift");
@@ -1412,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
}
- Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
- Hi = DAG.getNode(ISD::OR, NVT,
- DAG.getNode(Op1, NVT, InH, Amt),
- DAG.getNode(Op2, NVT, InL, Amt2));
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
return true;
}
-#endif
return false;
}
@@ -1498,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
return true;
}
-
- return false;
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -2311,8 +2321,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
- true, Func, Args, DAG, dl);
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
SplitInteger(CallInfo.first, Lo, Hi);
SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
@@ -2787,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
else if (SrcVT == MVT::i128)
FF = APInt(32, F32TwoE128);
else
- assert(false && "Unsupported UINT_TO_FP!");
+ llvm_unreachable("Unsupported UINT_TO_FP!");
// Check whether the sign bit is set.
SDValue Lo, Hi;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 04a6a4a..439aa4d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() {
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
switch (getTypeAction(ResultVT)) {
- default:
- assert(false && "Unknown action!");
case TargetLowering::TypeLegal:
break;
// The following calls must take care of *all* of the node's results,
@@ -275,8 +273,6 @@ ScanOperands:
EVT OpVT = N->getOperand(i).getValueType();
switch (getTypeAction(OpVT)) {
- default:
- assert(false && "Unknown action!");
case TargetLowering::TypeLegal:
continue;
// The following calls must either replace all of the node's results
@@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
- assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+ assert(Result.getValueType().getSizeInBits() >=
+ Op.getValueType().getVectorElementType().getSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
@@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
std::pair<SDValue,SDValue> CallInfo =
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), false,
- /*isReturnValueUsed=*/true,
+ false, 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
return CallInfo.first;
}
@@ -1088,7 +1089,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*isReturnValueUsed=*/true,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
return CallInfo;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 7c5472b..a8ff7c6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -45,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Handle some special cases efficiently.
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 4696c0d..3ae8345 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -141,7 +141,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
EVT ValVT = ST->getValue().getValueType();
if (StVT.isVector() && ST->isTruncatingStore())
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
- default: assert(0 && "This action is not supported yet!");
+ default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7ca0d1e..a8aee12 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -404,6 +404,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
switch (N->getOpcode()) {
default:
@@ -1565,12 +1569,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (getTypeAction(InVT)) {
- default:
- assert(false && "Unknown type action!");
- break;
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
InOp = GetPromotedInteger(InOp);
@@ -2326,19 +2333,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(Increment));
+ SDValue L;
if (LdWidth < NewVTWidth) {
// Our current type we are using is too large, find a better size
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
- }
-
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
isVolatile,
isNonTemporal, isInvariant,
MinAlign(Align, Increment));
- LdChain.push_back(LdOp.getValue(1));
- LdOps.push_back(LdOp);
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector()) {
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+ &Loads[0], Loads.size());
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+
LdWidth -= NewVTWidth;
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 0000000..1a27f3f
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,657 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+ Picker(this),
+ InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+ TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+ TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+ TLI = &IS->getTargetLowering();
+
+ const TargetMachine &tm = (*IS->MF).getTarget();
+ ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+ // This hard requirment could be relaxed, but for now
+ // do not let it procede.
+ assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ EVT VT = ScegN->getValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *SuccSU = I->getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ initNumRegDefsLeft(SU);
+ SU->NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return 0;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions alredy in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignor order deps.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ EVT VT = SU->getNode()->getValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is acheived by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ signed RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+ else {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ signed ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFive;
+ break;
+
+ case ISD::INLINEASM:
+ ResCount += PriorityFour;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::ScheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ EVT VT = ScegN->getValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ EVT VT = Op.getNode()->getValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --I->getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ adjustPriorityOfUnscheduledPreds(I->getSUnit());
+ if (!I->isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return 0;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ signed BestCost = SUSchedulingCost(*Best);
+ for (std::vector<SUnit *>::iterator I = Queue.begin(),
+ E = Queue.end(); I != E; ++I) {
+ if (*I == *Best)
+ continue;
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+ ResourcePriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b275c63..34ee1f3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
Added = true;
}
}
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
if (RegAdded.insert(*Alias)) {
LRegs.push_back(*Alias);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 80162d7..1017d36 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1175,7 +1175,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+ for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
// Check if Ref is live.
if (!LiveRegDefs[*AliasI]) continue;
@@ -1190,6 +1190,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
}
}
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i))
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (const RegisterMaskSDNode *Op =
+ dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+ return Op->getRegMask();
+ return NULL;
+}
+
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
@@ -1255,6 +1280,9 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
LRegs.push_back(CallResource);
}
}
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
@@ -2635,7 +2663,8 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
const TargetRegisterInfo *TRI) {
const unsigned *ImpDefs
= TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
- if(!ImpDefs)
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if(!ImpDefs && !RegMask)
return false;
for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
@@ -2646,14 +2675,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
if (!PI->isAssignedRegDep())
continue;
- for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
- // Return true if SU clobbers this physical register use and the
- // definition of the register reaches from DepSU. IsReachable queries a
- // topological forward sort of the DAG (following the successors).
- if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
- scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
- return true;
- }
+ if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
+
+ if (ImpDefs)
+ for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+ scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ return true;
}
}
return false;
@@ -2674,8 +2707,9 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
continue;
const unsigned *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
- if (!SUImpDefs)
- return false;
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (!SUImpDefs && !SURegMask)
+ continue;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
@@ -2683,6 +2717,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
if (!N->hasAnyUseOfValue(i))
continue;
unsigned Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ if (!SUImpDefs)
+ continue;
for (;*SUImpDefs; ++SUImpDefs) {
unsigned SUReg = *SUImpDefs;
if (TRI->regsOverlap(Reg, SUReg))
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 9c27b2e..17b4901 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -53,6 +53,7 @@ namespace llvm {
if (isa<ConstantSDNode>(Node)) return true;
if (isa<ConstantFPSDNode>(Node)) return true;
if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
if (isa<GlobalAddressSDNode>(Node)) return true;
if (isa<BasicBlockSDNode>(Node)) return true;
if (isa<FrameIndexSDNode>(Node)) return true;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
new file mode 100644
index 0000000..7d12509
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -0,0 +1,276 @@
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
+///
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+public:
+ ScheduleDAGVLIW(MachineFunction &mf,
+ AliasAnalysis *aa,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+
+ const TargetMachine &tm = mf.getTarget();
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
+
+ ~ScheduleDAGVLIW() {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule();
+
+private:
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGVLIW::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(AA);
+
+ AvailableQueue->initNodes(SUnits);
+
+ listScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(0);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ PendingQueue.push_back(SuccSU);
+ }
+}
+
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ releaseSucc(SU, *I);
+ }
+}
+
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ releaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->ScheduledNode(SU);
+}
+
+/// listScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGVLIW::listScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ releaseSuccessors(&EntrySU);
+
+ // All leaves to AvailableQueue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While AvailableQueue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+ else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->ScheduledNode(0);
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = 0;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop\n");
+ HazardRec->EmitNoop();
+ Sequence.push_back(0); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index dd626e2..796abf4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -383,7 +383,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::Register:
ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
break;
-
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
case ISD::SRCVALUE:
ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
break;
@@ -1037,10 +1039,8 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
&ignored);
return getConstantFP(apf, VT, isTarget);
- } else {
- assert(0 && "Unsupported type in getConstantFP");
- return SDValue();
- }
+ } else
+ llvm_unreachable("Unsupported type in getConstantFP");
}
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
@@ -1375,6 +1375,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(N, 0);
}
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+ ID.AddPointer(RegMask);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
@@ -2229,8 +2243,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
if (Tmp2 == 1) return 1;
- return std::min(Tmp, Tmp2)-1;
- break;
+ return std::min(Tmp, Tmp2)-1;
case ISD::SUB:
Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
@@ -2259,8 +2272,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
- return std::min(Tmp, Tmp2)-1;
- break;
+ return std::min(Tmp, Tmp2)-1;
case ISD::TRUNCATE:
// FIXME: it's tricky to do anything useful for this, but it is an important
// case for targets like X86.
@@ -2571,17 +2583,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
"Vector element count mismatch!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
- OpOpcode == ISD::ANY_EXTEND) {
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
.bitsLT(VT.getScalarType()))
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
- else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- else
- return Operand.getNode()->getOperand(0);
+ return Operand.getNode()->getOperand(0);
}
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
break;
case ISD::BITCAST:
// Basic sanity checking.
@@ -3143,16 +3156,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::SELECT:
if (N1C) {
if (N1C->getZExtValue())
- return N2; // select true, X, Y -> X
- else
- return N3; // select false, X, Y -> Y
+ return N2; // select true, X, Y -> X
+ return N3; // select false, X, Y -> Y
}
if (N2 == N3) return N2; // select C, X, X -> X
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
- break;
case ISD::INSERT_SUBVECTOR: {
SDValue Index = N3;
if (VT.isSimple() && N1.getValueType().isSimple()
@@ -3285,8 +3296,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
- const TargetLowering &TLI,
- std::string &Str, unsigned Offset) {
+ const TargetLowering &TLI, StringRef Str) {
// Handle vector with all elements zero.
if (Str.empty()) {
if (VT.isInteger())
@@ -3304,15 +3314,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
}
assert(!VT.isVector() && "Can't handle vector type here!");
- unsigned NumBits = VT.getSizeInBits();
- unsigned MSB = NumBits / 8;
+ unsigned NumVTBytes = VT.getSizeInBits() / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
uint64_t Val = 0;
- if (TLI.isLittleEndian())
- Offset = Offset + MSB - 1;
- for (unsigned i = 0; i != MSB; ++i) {
- Val = (Val << 8) | (unsigned char)Str[Offset];
- Offset += TLI.isLittleEndian() ? -1 : 1;
+ if (TLI.isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
}
+
return DAG.getConstant(Val, VT);
}
@@ -3327,7 +3340,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
/// isMemSrcFromString - Returns true if memcpy source is a string constant.
///
-static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
unsigned SrcDelta = 0;
GlobalAddressSDNode *G = NULL;
if (Src.getOpcode() == ISD::GlobalAddress)
@@ -3341,11 +3354,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
if (!G)
return false;
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
- if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
- return true;
-
- return false;
+ return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
}
/// FindOptimalMemOpLowering - Determines the optimial series memory ops
@@ -3448,7 +3457,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- std::string Str;
+ StringRef Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
@@ -3485,7 +3494,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// We only handle zero vectors here.
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
- Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstPtrInfo.getWithOffset(DstOff), isVol,
@@ -3727,8 +3736,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
TLI.getPointerTy()),
Args, *this, dl);
@@ -3779,8 +3789,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
TLI.getPointerTy()),
Args, *this, dl);
@@ -3839,8 +3850,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
std::pair<SDValue,SDValue> CallResult =
TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
- TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
- /*isReturnValueUsed=*/false,
+ TLI.getLibcallCallingConv(RTLIB::MEMSET),
+ /*isTailCall=*/false,
+ /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()),
Args, *this, dl);
@@ -5901,7 +5913,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
if (G)
if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
if (getMachineOpcode() < TII->getNumOpcodes())
- return TII->get(getMachineOpcode()).getName();
+ return TII->getName(getMachineOpcode());
return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
}
if (G) {
@@ -5945,7 +5957,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::BasicBlock: return "BasicBlock";
case ISD::VALUETYPE: return "ValueType";
case ISD::Register: return "Register";
-
+ case ISD::RegisterMask: return "RegisterMask";
case ISD::Constant: return "Constant";
case ISD::ConstantFP: return "ConstantFP";
case ISD::GlobalAddress: return "GlobalAddress";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 68c9514..4e4aa11 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
- DAG.getIntPtrConstant(1));
+ DAG.getTargetConstant(1, TLI.getPointerTy()));
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
@@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
- return SDValue();
}
/// getCopyFromParts - Create a value that contains the specified legal parts
@@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
@@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
- assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
"Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
}
// The value may have changed - recompute ValueVT.
@@ -966,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
DanglingDebugInfoMap[V] = DanglingDebugInfo();
}
}
@@ -1056,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return DAG.getMergeValues(&Constants[0], Constants.size(),
getCurDebugLoc());
}
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+ VT, &Ops[0], Ops.size());
+ }
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
@@ -1090,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
// Now that we know the number and type of the elements, get that number of
// elements into the Ops array based on what kind of constant it is.
SmallVector<SDValue, 16> Ops;
- if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
for (unsigned i = 0; i != NumElements; ++i)
- Ops.push_back(getValue(CP->getOperand(i)));
+ Ops.push_back(getValue(CV->getOperand(i)));
} else {
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
EVT EltVT = TLI.getValueType(VecTy->getElementType());
@@ -1128,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
llvm_unreachable("Can't get register for value!");
- return SDValue();
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
@@ -1287,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
}
/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
- MachineBasicBlock *Dst) {
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
if (!BPI)
return 0;
@@ -1824,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
DAG.getBasicBlock(Return)));
}
-void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
-}
-
void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
}
@@ -1839,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
AddLandingPadInfo(LP, MMI, MBB);
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ if (TLI.getExceptionPointerRegister() == 0 &&
+ TLI.getExceptionSelectorRegister() == 0)
+ return;
+
SmallVector<EVT, 2> ValueVTs;
ComputeValueVTs(TLI, LP.getType(), ValueVTs);
@@ -2194,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
- Constant *C = Pivot->Low;
+ const Constant *C = Pivot->Low;
MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
// We know that we branch to the LHS if the Value being switched on is
@@ -2387,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
- for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
- BasicBlock *SuccBB = SI.getSuccessor(i);
+ for (size_t i = 0; i < SI.getNumCases(); ++i) {
+ BasicBlock *SuccBB = SI.getCaseSuccessor(i);
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
- Cases.push_back(Case(SI.getSuccessorValue(i),
- SI.getSuccessorValue(i),
+ Cases.push_back(Case(SI.getCaseValue(i),
+ SI.getCaseValue(i),
SMBB, ExtraWeight));
}
std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2461,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// If there is only the default destination, branch to it if it is not the
// next basic block. Otherwise, just fall through.
- if (SI.getNumCases() == 1) {
+ if (!SI.getNumCases()) {
// Update machine-CFG edges.
// If this is not a fall-through branch, emit the branch.
@@ -2691,7 +2715,8 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
- DestVT, N, DAG.getIntPtrConstant(0)));
+ DestVT, N,
+ DAG.getTargetConstant(0, TLI.getPointerTy())));
}
void SelectionDAGBuilder::visitFPExt(const User &I){
@@ -2778,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
TLI.getValueType(I.getType()), InVec, InIdx));
}
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
- unsigned MaskNumElts = Mask.size();
- for (unsigned i = 0; i != MaskNumElts; ++i)
- if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// begining // from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+ int Pos, int Size, int Low) {
+ for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+ if (Mask[i] >= 0 && Mask[i] != Low)
return false;
return true;
}
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
- SmallVector<int, 8> Mask;
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
- // Convert the ConstantVector mask operand into an array of ints, with -1
- // representing undef values.
- SmallVector<Constant*, 8> MaskElts;
- cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
- unsigned MaskNumElts = MaskElts.size();
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- if (isa<UndefValue>(MaskElts[i]))
- Mask.push_back(-1);
- else
- Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
- }
-
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+ unsigned MaskNumElts = Mask.size();
+
EVT VT = TLI.getValueType(I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2820,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Mask is longer than the source vectors and is a multiple of the source
// vectors. We can use concatenate vector to make the mask and vectors
// lengths match.
- if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
- // The shuffle is concatenating two vectors together.
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
- VT, Src1, Src2));
- return;
+ if (SrcNumElts*2 == MaskNumElts) {
+ // First check for Src1 in low and Src2 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src1, Src2));
+ return;
+ }
+ // Then check for Src2 in low and Src1 in high
+ if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+ isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+ // The shuffle is concatenating two vectors together.
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+ VT, Src2, Src1));
+ return;
+ }
}
// Pad both vectors with undefs to make them the same length as the mask.
@@ -3048,7 +3077,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(I.getOperand(0));
- Type *Ty = I.getOperand(0)->getType();
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ Type *Ty = I.getOperand(0)->getType()->getScalarType();
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
@@ -3365,7 +3396,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
DebugLoc dl = getCurDebugLoc();
ISD::NodeType NT;
switch (I.getOperation()) {
- default: llvm_unreachable("Unknown atomicrmw operation"); return;
+ default: llvm_unreachable("Unknown atomicrmw operation");
case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
@@ -3503,24 +3534,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
SDValue Op = getValue(I.getArgOperand(i));
- assert(TLI.isTypeLegal(Op.getValueType()) &&
- "Intrinsic uses a non-legal type?");
Ops.push_back(Op);
}
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
- for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
- assert(TLI.isTypeLegal(ValueVTs[Val]) &&
- "Intrinsic uses a non-legal type?");
- }
-#endif // NDEBUG
if (HasChain)
ValueVTs.push_back(MVT::Other);
@@ -4480,9 +4503,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::setjmp:
- return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
case Intrinsic::longjmp:
- return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
// Assert for address < 256 since we support only user defined address
// spaces.
@@ -4550,7 +4573,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check if address has undef value.
if (isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return 0;
}
@@ -4560,11 +4583,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N = UnusedArgNodeMap[Address];
SDDbgValue *SDV;
if (N.getNode()) {
- // Parameters are handled specially.
- bool isParameter =
- DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ bool isParameter =
+ (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+ isa<Argument>(Address));
+
const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
if (isParameter && !AI) {
@@ -4584,7 +4609,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
0, dl, SDNodeOrder);
else {
// Can't do anything with other non-AI cases yet.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+ DEBUG(Address->dump());
return 0;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
@@ -4606,7 +4633,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}
}
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
return 0;
@@ -4652,7 +4679,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter.
- DEBUG(dbgs() << "Dropping debug info for " << DI);
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
@@ -4674,43 +4701,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
return 0;
}
- case Intrinsic::eh_exception: {
- // Insert the EXCEPTIONADDR instruction.
- assert(FuncInfo.MBB->isLandingPad() &&
- "Call to eh.exception not in landing pad!");
- SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
- SDValue Ops[1];
- Ops[0] = DAG.getRoot();
- SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
- setValue(&I, Op);
- DAG.setRoot(Op.getValue(1));
- return 0;
- }
-
- case Intrinsic::eh_selector: {
- MachineBasicBlock *CallMBB = FuncInfo.MBB;
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (CallMBB->isLandingPad())
- AddCatchInfo(I, &MMI, CallMBB);
- else {
-#ifndef NDEBUG
- FuncInfo.CatchInfoLost.insert(&I);
-#endif
- // FIXME: Mark exception selector register as live in. Hack for PR1508.
- unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBB->addLiveIn(Reg);
- }
-
- // Insert the EHSELECTION instruction.
- SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
- SDValue Ops[2];
- Ops[0] = getValue(I.getArgOperand(0));
- Ops[1] = getRoot();
- SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
- DAG.setRoot(Op.getValue(1));
- setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
- return 0;
- }
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
@@ -4843,6 +4833,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return 0;
}
+ case Intrinsic::x86_avx_vinsertf128_pd_256:
+ case Intrinsic::x86_avx_vinsertf128_ps_256:
+ case Intrinsic::x86_avx_vinsertf128_si_256: {
+ DebugLoc dl = getCurDebugLoc();
+ EVT DestVT = TLI.getValueType(I.getType());
+ EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+ ElVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getConstant(Idx, MVT::i32));
+ setValue(&I, Res);
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -5058,7 +5063,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
- return 0;
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
return 0;
@@ -5079,7 +5083,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue> Result =
TLI.LowerCallTo(getRoot(), I.getType(),
false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
Args, DAG, getCurDebugLoc());
DAG.setRoot(Result.second);
@@ -5242,6 +5247,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
CS.getCallingConv(),
isTailCall,
+ CS.doesNotReturn(),
!CS.getInstruction()->use_empty(),
Callee, Args, DAG, getCurDebugLoc());
assert((isTailCall || Result.second.getNode()) &&
@@ -5477,23 +5483,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
- // See if any floating point values are being passed to this function. This is
- // used to emit an undefined reference to fltused on Windows.
- FunctionType *FT =
- cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (FT->isVarArg() &&
- !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Type* T = I.getArgOperand(i)->getType();
- for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
- i != e; ++i) {
- if (!i->isFloatingPointTy()) continue;
- MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
- break;
- }
- }
- }
+ ComputeUsesVAFloatArgument(I, &MMI);
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
@@ -5636,7 +5627,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
(LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
@@ -5647,7 +5639,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
(LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.onlyReadsMemory()) {
SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
@@ -5690,22 +5683,6 @@ public:
: TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
}
- /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
- /// busy in OutputRegs/InputRegs.
- void MarkAllocatedRegs(bool isOutReg, bool isInReg,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs,
- const TargetRegisterInfo &TRI) const {
- if (isOutReg) {
- for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
- MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
- }
- if (isInReg) {
- for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
- MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
- }
- }
-
/// getCallOperandValEVT - Return the EVT of the Value* that this operand
/// corresponds to. If there is no Value* for this operand, it returns
/// MVT::Other.
@@ -5753,18 +5730,6 @@ public:
return TLI.getValueType(OpTy, true);
}
-
-private:
- /// MarkRegAndAliases - Mark the specified register and all aliases in the
- /// specified set.
- static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
- const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
- Regs.insert(Reg);
- if (const unsigned *Aliases = TRI.getAliasSet(Reg))
- for (; *Aliases; ++Aliases)
- Regs.insert(*Aliases);
- }
};
typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
@@ -5778,39 +5743,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
/// allocation. This produces generally horrible, but correct, code.
///
/// OpInfo describes the operand.
-/// Input and OutputRegs are the set of already allocated physical registers.
///
static void GetRegistersForValue(SelectionDAG &DAG,
const TargetLowering &TLI,
DebugLoc DL,
- SDISelAsmOperandInfo &OpInfo,
- std::set<unsigned> &OutputRegs,
- std::set<unsigned> &InputRegs) {
+ SDISelAsmOperandInfo &OpInfo) {
LLVMContext &Context = *DAG.getContext();
- // Compute whether this value requires an input register, an output register,
- // or both.
- bool isOutReg = false;
- bool isInReg = false;
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- isOutReg = true;
-
- // If there is an input constraint that matches this, we need to reserve
- // the input register so no other inputs allocate to it.
- isInReg = OpInfo.hasMatchingInput();
- break;
- case InlineAsm::isInput:
- isInReg = true;
- isOutReg = false;
- break;
- case InlineAsm::isClobber:
- isOutReg = true;
- isInReg = true;
- break;
- }
-
-
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
@@ -5884,8 +5823,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
- OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
return;
}
@@ -5916,8 +5853,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
/// ConstraintOperands - Information about all of the constraints.
SDISelAsmOperandInfoVector ConstraintOperands;
- std::set<unsigned> OutputRegs, InputRegs;
-
TargetLowering::AsmOperandInfoVector
TargetConstraints = TLI.ParseConstraints(CS);
@@ -6050,7 +5985,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// constant pool entry to get its address.
const Value *OpVal = OpInfo.CallOperandVal;
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
- isa<ConstantVector>(OpVal)) {
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
TLI.getPointerTy());
} else {
@@ -6079,8 +6014,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
- InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
}
// Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -6091,8 +6025,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
- InputRegs);
+ GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6146,9 +6079,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Copy the output from the appropriate register. Find a register that
// we can use.
- if (OpInfo.AssignedRegs.Regs.empty())
- report_fatal_error("Couldn't allocate output reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// If this is an indirect operand, store through the pointer after the
// asm.
@@ -6248,9 +6185,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
- if (Ops.empty())
- report_fatal_error("Invalid operand for inline asm constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (Ops.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType =
@@ -6281,9 +6222,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
"Don't know how to handle indirect register inputs yet!");
// Copy the input into the appropriate registers.
- if (OpInfo.AssignedRegs.Regs.empty())
- report_fatal_error("Couldn't allocate input reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'!");
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
Chain, &Flag);
@@ -6421,7 +6366,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
bool RetSExt, bool RetZExt, bool isVarArg,
bool isInreg, unsigned NumFixedArgs,
CallingConv::ID CallConv, bool isTailCall,
- bool isReturnValueUsed,
+ bool doesNotRet, bool isReturnValueUsed,
SDValue Callee,
ArgListTy &Args, SelectionDAG &DAG,
DebugLoc dl) const {
@@ -6518,7 +6463,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
}
SmallVector<SDValue, 4> InVals;
- Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+ Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
Outs, OutVals, Ins, dl, DAG, InVals);
// Verify that the target's LowerCall behaved as expected.
@@ -6587,7 +6532,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("LowerOperation not implemented for this target!");
- return SDValue();
}
void
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5147b6c..8cf88e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -72,7 +72,6 @@ class TargetLowering;
class TruncInst;
class UIToFPInst;
class UnreachableInst;
-class UnwindInst;
class VAArgInst;
class ZExtInst;
@@ -130,13 +129,13 @@ private:
/// Case - A struct to record the Value for a switch case, and the
/// case's target basic block.
struct Case {
- Constant* Low;
- Constant* High;
+ const Constant *Low;
+ const Constant *High;
MachineBasicBlock* BB;
uint32_t ExtraWeight;
Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
- Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+ Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
uint32_t extraweight) : Low(low), High(high), BB(bb),
ExtraWeight(extraweight) { }
@@ -454,7 +453,8 @@ private:
MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
- uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+ uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
uint32_t Weight = 0);
public:
@@ -474,7 +474,6 @@ private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
void visitResume(const ResumeInst &I);
- void visitUnwind(const UnwindInst &I);
void visitBinary(const User &I, unsigned OpCode);
void visitShift(const User &I, unsigned Opcode);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3c95059..2173d8d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -74,7 +74,6 @@ STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
-STATISTIC(NumFastIselFailUnwind,"Fast isel fails on Unwind");
STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
// Standard binary operators...
@@ -218,12 +217,15 @@ namespace llvm {
CodeGenOpt::Level OptLevel) {
const TargetLowering &TLI = IS->getTargetLowering();
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOpt::None ||
+ TLI.getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::RegPressure)
return createBURRListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::Hybrid)
return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
assert(TLI.getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
@@ -248,7 +250,6 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
"TargetLowering::EmitInstrWithCustomInserter!";
#endif
llvm_unreachable(0);
- return 0;
}
void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
@@ -262,6 +263,8 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
+void SelectionDAGISel::ISelUpdater::anchor() { }
+
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
@@ -452,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
// Determine if there is a call to setjmp in the machine function.
- MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
// Replace forward-declared registers with the registers containing
// the desired value.
@@ -777,37 +780,12 @@ void SelectionDAGISel::PrepareEHLandingPad() {
.addSym(Label);
// Mark exception register as live in.
- unsigned Reg = TLI.getExceptionAddressRegister();
+ unsigned Reg = TLI.getExceptionPointerRegister();
if (Reg) MBB->addLiveIn(Reg);
// Mark exception selector register as live in.
Reg = TLI.getExceptionSelectorRegister();
if (Reg) MBB->addLiveIn(Reg);
-
- // FIXME: Hack around an exception handling flaw (PR1508): the personality
- // function and list of typeids logically belong to the invoke (or, if you
- // like, the basic block containing the invoke), and need to be associated
- // with it in the dwarf exception handling tables. Currently however the
- // information is provided by an intrinsic (eh.selector) that can be moved
- // to unexpected places by the optimizers: if the unwind edge is critical,
- // then breaking it can result in the intrinsics being in the successor of
- // the landing pad, not the landing pad itself. This results
- // in exceptions not being caught because no typeids are associated with
- // the invoke. This may not be the only way things can go wrong, but it
- // is the only way we try to work around for the moment.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
- if (Br && Br->isUnconditional()) { // Critical edge?
- BasicBlock::const_iterator I, E;
- for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
- if (isa<EHSelectorInst>(I))
- break;
-
- if (I == E)
- // No catch info found - try to extract some from the successor.
- CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
- }
}
/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
@@ -901,6 +879,10 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
}
#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses. Only those
+// instructions that cause the bail are accounted for. It does not account for
+// instructions higher in the block. Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
static void collectFailStats(const Instruction *I) {
switch (I->getOpcode()) {
default: assert (0 && "<Invalid operator> ");
@@ -912,7 +894,6 @@ static void collectFailStats(const Instruction *I) {
case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
case Instruction::Invoke: NumFastIselFailInvoke++; return;
case Instruction::Resume: NumFastIselFailResume++; return;
- case Instruction::Unwind: NumFastIselFailUnwind++; return;
case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
// Standard binary operators...
@@ -974,7 +955,6 @@ static void collectFailStats(const Instruction *I) {
case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
}
- return;
}
#endif
@@ -2199,6 +2179,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::EntryToken: // These nodes remain the same.
case ISD::BasicBlock:
case ISD::Register:
+ case ISD::RegisterMask:
//case ISD::VALUETYPE:
//case ISD::CONDCODE:
case ISD::HANDLENODE:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index a7cf089..6cde05a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -27,7 +27,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
using namespace llvm;
namespace llvm {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9ced1ac..792de75 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -572,21 +572,22 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FLOG , MVT::f64, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG10, MVT::f64, Expand);
- setOperationAction(ISD::FEXP , MVT::f64, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
- setOperationAction(ISD::FCEIL, MVT::f64, Expand);
- setOperationAction(ISD::FRINT, MVT::f64, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+ setOperationAction(ISD::FEXP , MVT::f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
setOperationAction(ISD::FLOG , MVT::f32, Expand);
setOperationAction(ISD::FLOG2, MVT::f32, Expand);
setOperationAction(ISD::FLOG10, MVT::f32, Expand);
@@ -597,6 +598,16 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
setOperationAction(ISD::FCEIL, MVT::f32, Expand);
setOperationAction(ISD::FRINT, MVT::f32, Expand);
setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -1597,23 +1608,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
break;
case ISD::SIGN_EXTEND_INREG: {
- EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+ // If we only care about the highest bit, don't bother shifting right.
+ if (MsbMask == DemandedMask) {
+ unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp, ShiftAmt));
+ }
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
- BitWidth - EVT.getScalarType().getSizeInBits());
+ BitWidth - ExVT.getScalarType().getSizeInBits());
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit =
- APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
- EVT.getScalarType().getSizeInBits()) &
+ ExVT.getScalarType().getSizeInBits()) &
NewMask;
// Since the sign extended bits are demanded, we know that the sign
@@ -1631,7 +1659,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
return TLO.CombineTo(Op,
- TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
@@ -2995,7 +3023,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
/// is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
- default: llvm_unreachable("Unknown constraint type!");
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@@ -3006,6 +3033,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
case TargetLowering::C_Memory:
return 3;
}
+ llvm_unreachable("Invalid constraint type");
}
/// Examine constraint type and operand type and determine a weight value.