diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
23 files changed, 1971 insertions, 607 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 6023326..9a79217 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -10,13 +10,15 @@ add_llvm_library(LLVMSelectionDAG LegalizeTypesGeneric.cpp LegalizeVectorOps.cpp LegalizeVectorTypes.cpp + ResourcePriorityQueue.cpp ScheduleDAGFast.cpp - ScheduleDAGRRList.cpp + ScheduleDAGRRList.cpp ScheduleDAGSDNodes.cpp SelectionDAG.cpp SelectionDAGBuilder.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp + ScheduleDAGVLIW.cpp TargetLowering.cpp TargetSelectionDAGInfo.cpp ) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 80cf0a8..1b148ad 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -63,7 +63,24 @@ namespace { bool LegalTypes; // Worklist of all of the nodes that need to be simplified. - std::vector<SDNode*> WorkList; + // + // This has the semantics that when adding to the worklist, + // the item added must be next to be processed. It should + // also only appear once. The naive approach to this takes + // linear time. + // + // To reduce the insert/remove time to logarithmic, we use + // a set and a vector to maintain our worklist. + // + // The set contains the items on the worklist, but does not + // maintain the order they should be visited. + // + // The vector maintains the order nodes should be visited, but may + // contain duplicate or removed nodes. When choosing a node to + // visit, we pop off the order stack until we find an item that is + // also in the contents set. All operations are O(log N). + SmallPtrSet<SDNode*, 64> WorkListContents; + std::vector<SDNode*> WorkListOrder; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -83,18 +100,17 @@ namespace { SDValue visit(SDNode *N); public: - /// AddToWorkList - Add to the work list making sure it's instance is at the - /// the back (next to be processed.) + /// AddToWorkList - Add to the work list making sure its instance is at the + /// back (next to be processed.) void AddToWorkList(SDNode *N) { - removeFromWorkList(N); - WorkList.push_back(N); + WorkListContents.insert(N); + WorkListOrder.push_back(N); } /// removeFromWorkList - remove all instances of N from the worklist. /// void removeFromWorkList(SDNode *N) { - WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), - WorkList.end()); + WorkListContents.erase(N); } SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, @@ -158,7 +174,9 @@ namespace { SDValue visitADD(SDNode *N); SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); + SDValue visitSUBC(SDNode *N); SDValue visitADDE(SDNode *N); + SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitUDIV(SDNode *N); @@ -957,10 +975,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. - WorkList.reserve(DAG.allnodes_size()); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - WorkList.push_back(I); + AddToWorkList(I); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -971,11 +988,18 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // done. Set it to null to avoid confusion. DAG.setRoot(SDValue()); - // while the worklist isn't empty, inspect the node on the end of it and + // while the worklist isn't empty, find a node and // try and combine it. - while (!WorkList.empty()) { - SDNode *N = WorkList.back(); - WorkList.pop_back(); + while (!WorkListContents.empty()) { + SDNode *N; + // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // In order to avoid a linear scan, we use a set (O(log N)) to hold what the + // worklist *should* contain, and check the node we want to visit is should + // actually be visited. + do { + N = WorkListOrder.back(); + WorkListOrder.pop_back(); + } while (!WorkListContents.erase(N)); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a @@ -1059,7 +1083,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADD: return visitADD(N); case ISD::SUB: return visitSUB(N); case ISD::ADDC: return visitADDC(N); + case ISD::SUBC: return visitSUBC(N); case ISD::ADDE: return visitADDE(N); + case ISD::SUBE: return visitSUBE(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1497,8 +1523,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. - if (N->hasNUsesOfValue(0, 1)) - return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -1546,7 +1572,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); return SDValue(); } @@ -1656,6 +1682,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSUBC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, VT), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, 0) -> x + no borrow + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (N0C && N0C->isAllOnesValue()) + return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (sube x, y, false) -> (subc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2320,6 +2391,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must + // already be zero by virtue of the width of the base type of the load. + // + // the 'X' node here can either be nothing or an extract_vector_elt to catch + // more cases. + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getOpcode() == ISD::LOAD) || + N0.getOpcode() == ISD::LOAD) { + LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? + N0 : N0.getOperand(0) ); + + // Get the constant (if applicable) the zero'th operand is being ANDed with. + // This can be a pure constant or a vector splat, in which case we treat the + // vector as a scalar and use the splat value. + APInt Constant = APInt::getNullValue(1); + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + Constant = C->getAPIntValue(); + } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs); + if (IsSplat) { + // Undef bits can contribute to a possible optimisation if set, so + // set them. + SplatValue |= SplatUndef; + + // The splat value may be something like "0x00FFFFFF", which means 0 for + // the first vector value and FF for the rest, repeating. We need a mask + // that will apply equally to all members of the vector, so AND all the + // lanes of the constant together. + EVT VT = Vector->getValueType(0); + unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } + } + + // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is + // actually legal and isn't going to get expanded, else this is a false + // optimisation. + bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getMemoryVT()); + + // Resize the constant to the same size as the original memory access before + // extension. If it is still the AllOnesValue then this AND is completely + // unneeded. + Constant = + Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); + + bool B; + switch (Load->getExtensionType()) { + default: B = false; break; + case ISD::EXTLOAD: B = CanZextLoadProfitably; break; + case ISD::ZEXTLOAD: + case ISD::NON_EXTLOAD: B = true; break; + } + + if (B && Constant.isAllOnesValue()) { + // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to + // preserve semantics once we get rid of the AND. + SDValue NewLoad(Load, 0); + if (Load->getExtensionType() == ISD::EXTLOAD) { + NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, + Load->getValueType(0), Load->getDebugLoc(), + Load->getChain(), Load->getBasePtr(), + Load->getOffset(), Load->getMemoryVT(), + Load->getMemOperand()); + // Replace uses of the EXTLOAD with the new ZEXTLOAD. + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } + + // Fold the AND away, taking care not to fold to the old load node if we + // replaced it. + CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); @@ -3331,7 +3484,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) - if (N1C && N0.getOpcode() == ISD::SRL && + // Only fold this if the inner shift has no other uses -- if it does, folding + // this will increase the total number of instructions. + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); if (c1 < VT.getSizeInBits()) { @@ -4203,6 +4358,29 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (zext (truncate x)) -> (zext x) or + // (zext (truncate x)) -> (truncate x) + // This is valid when the truncated bits of x are already zero. + // FIXME: We should extend this to work for vectors too. + if (N0.getOpcode() == ISD::TRUNCATE && !VT.isVector()) { + SDValue Op = N0.getOperand(0); + APInt TruncatedBits + = APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(Op, TruncatedBits, KnownZero, KnownOne); + if (TruncatedBits == KnownZero) { + if (VT.bitsGT(Op.getValueType())) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + if (VT.bitsLT(Op.getValueType())) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + + return Op; + } + } + // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { @@ -4883,6 +5061,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + bool isLE = TLI.isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -4910,6 +5089,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // Fold extract-and-trunc into a narrow extract. For example: + // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) + // i32 y = TRUNCATE(i64 x) + // -- becomes -- + // v16i8 b = BITCAST (v2i64 val) + // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) + // + // Note: We only run this optimization after type legalization (which often + // creates this pattern) and before operation legalization after which + // we need to be more careful about the vector instructions that we generate. + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LegalTypes && !LegalOperations && N0->hasOneUse()) { + + EVT VecTy = N0.getOperand(0).getValueType(); + EVT ExTy = N0.getValueType(); + EVT TrTy = N->getValueType(0); + + unsigned NumElem = VecTy.getVectorNumElements(); + unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = N0->getOperand(1); + if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + + int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); + + SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + NVT, N0.getOperand(0)); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + N->getDebugLoc(), TrTy, V, + DAG.getConstant(Index, MVT::i32)); + } + } + // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y @@ -5910,6 +6127,44 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { return SDValue(); } +/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that +/// uses N as its base pointer and that N may be folded in the load / store +/// addressing mode. FIXME: This currently only looks for folding of +/// [reg +/- imm] addressing modes. +static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = Use->getValueType(0); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getValue().getValueType(); + } else + return false; + + TargetLowering::AddrMode AM; + if (N->getOpcode() == ISD::ADD) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + AM.BaseOffs = Offset->getSExtValue(); + else + return false; + } else if (N->getOpcode() == ISD::SUB) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + AM.BaseOffs = -Offset->getSExtValue(); + else + return false; + } else + return false; + + return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); +} + /// CombineToPreIndexedLoadStore - Try turning a load / store into a /// pre-indexed load / store when the base pointer is an add or subtract /// and it has other uses besides the load / store. After the @@ -5996,10 +6251,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (N->hasPredecessorHelper(Use, Visited, Worklist)) return false; - if (!((Use->getOpcode() == ISD::LOAD && - cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || - (Use->getOpcode() == ISD::STORE && - cast<StoreSDNode>(Use)->getBasePtr() == Ptr))) + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) RealUse = true; } @@ -6096,7 +6350,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { continue; // Try turning it into a post-indexed load / store except when - // 1) All uses are load / store ops that use it as base ptr. + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). // 2) Op must be independent of N, i.e. Op is neither a predecessor // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. @@ -6119,10 +6374,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!((UseUse->getOpcode() == ISD::LOAD && - cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) || - (UseUse->getOpcode() == ISD::STORE && - cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use))) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -6189,7 +6441,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (!LD->isVolatile()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. - if (N->hasNUsesOfValue(0, 0)) { + if (!N->hasAnyUseOfValue(0)) { // It's not safe to use the two value CombineTo variant here. e.g. // v1, chain2 = load chain1, loc // v2, chain3 = load chain2, loc @@ -6214,7 +6466,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); @@ -6873,13 +7125,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); + EVT VT = InVec.getValueType(); + EVT NVT = N->getValueType(0); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = InVec.getOperand(0); - EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); @@ -6887,6 +7140,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return InOp; } + SDValue EltNo = N->getOperand(1); + bool ConstEltNo = isa<ConstantSDNode>(EltNo); + + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. + // We only perform this optimization before the op legalization phase because + // we may introduce new vector instructions which are not backed by TD patterns. + // For example on AVX, extracting elements from a wide vector without using + // extract_subvector. + if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE + && ConstEltNo && !LegalOperations) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int NumElem = VT.getVectorNumElements(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); + // Find the new index to extract from. + int OrigElt = SVOp->getMaskElt(Elt); + + // Extracting an undef index is undef. + if (OrigElt == -1) + return DAG.getUNDEF(NVT); + + // Select the right vector half to extract from. + if (OrigElt < NumElem) { + InVec = InVec->getOperand(0); + } else { + InVec = InVec->getOperand(1); + OrigElt -= NumElem; + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + InVec, DAG.getConstant(OrigElt, MVT::i32)); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -6894,17 +7179,19 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) - SDValue EltNo = N->getOperand(1); - if (isa<ConstantSDNode>(EltNo)) { + if (ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - EVT VT = InVec.getValueType(); EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); @@ -6922,12 +7209,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + // If the bit convert changed the number of elements, it is unsafe // to examine the mask. if (BCNumEltsChanged) @@ -6938,14 +7233,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - if (InVec.getOpcode() == ISD::BITCAST) + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + InVec = InVec.getOperand(0); + } if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } + // Make sure we found a non-volatile load and the extractelement is + // the only use. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); @@ -6982,6 +7284,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), @@ -7011,11 +7316,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // optimizations. We do not handle sign-extend because we can't fill the sign // using shuffles. EVT SourceType = MVT::Other; - bool allAnyExt = true; - for (unsigned i = 0; i < NumInScalars; ++i) { + bool AllAnyExt = true; + bool AllUndef = true; + for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. if (In.getOpcode() == ISD::UNDEF) continue; + AllUndef = false; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; @@ -7040,15 +7347,17 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } // Check if all of the extends are ANY_EXTENDs. - allAnyExt &= AnyExt; + AllAnyExt &= AnyExt; } + if (AllUndef) + return DAG.getUNDEF(VT); // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. EVT OutScalarTy = N->getValueType(0).getScalarType(); - bool validTypes = SourceType != MVT::Other && + bool ValidTypes = SourceType != MVT::Other && isPowerOf2_32(OutScalarTy.getSizeInBits()) && isPowerOf2_32(SourceType.getSizeInBits()); @@ -7058,11 +7367,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // will be type-legalized to complex code sequences. // We perform this optimization only before the operation legalizer because we // may introduce illegal operations. - if (LegalTypes && !LegalOperations && validTypes) { + if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && + ValidTypes) { bool isLE = TLI.isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); - SDValue Filler = allAnyExt ? DAG.getUNDEF(SourceType): + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): DAG.getConstant(0, SourceType); unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); @@ -7117,15 +7427,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { break; } - // If the input vector type disagrees with the result of the build_vector, - // we can't make a shuffle. + // We allow up to two distinct input vectors. SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); - if (ExtractedFromVec.getValueType() != VT) { - VecIn1 = VecIn2 = SDValue(0, 0); - break; - } - - // Otherwise, remember this. We allow up to two distinct input vectors. if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; @@ -7140,7 +7443,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } - // If everything is good, we can make a shuffle operation. + // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { SmallVector<int, 8> Mask; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -7166,14 +7469,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { Mask.push_back(Idx+NumInScalars); } - // Add count and size info. + // We can't generate a shuffle node with mismatched input and output types. + // Attempt to transform a single input vector to the correct type. + if ((VT != VecIn1.getValueType())) { + // We don't support shuffeling between TWO values of different types. + if (VecIn2.getNode() != 0) + return SDValue(); + + // We only support widening of vectors which are half the size of the + // output registers. For example XMM->YMM widening on X86 with AVX. + if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) + return SDValue(); + + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } + + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + + // Check that we were able to transform all incoming values to the same type. + if (VecIn2.getValueType() != VecIn1.getValueType() || + VecIn1.getValueType() != VT) + return SDValue(); + + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. if (!isTypeLegal(VT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. SDValue Ops[2]; Ops[0] = VecIn1; - Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + Ops[1] = VecIn2; return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); } @@ -7232,15 +7560,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); assert(N0.getValueType().getVectorNumElements() == NumElts && "Vector shuffle must be normalized in DAG"); - // FIXME: implement canonicalizations from DAG.getVectorShuffle() + // Canonicalize shuffle undef, undef -> undef + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + // Canonicalize shuffle v, v -> v, undef + if (N0 == N1) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) Idx -= NumElts; + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N0.getOpcode() == ISD::UNDEF) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx < 0) + NewMask.push_back(Idx); + else if (Idx < (int)NumElts) + NewMask.push_back(Idx + NumElts); + else + NewMask.push_back(Idx - NumElts); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Remove references to rhs if it is undef + if (N1.getOpcode() == ISD::UNDEF) { + bool Changed = false; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) { + Idx = -1; + Changed = true; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + } // If it is a splat, check if the argument vector is another splat or a // build_vector with all scalar elements the same. - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); @@ -8029,30 +8405,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, - int &SrcValueOffset, - unsigned &SrcValueAlign, - const MDNode *&TBAAInfo) const { - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - Ptr = LD->getBasePtr(); - Size = LD->getMemoryVT().getSizeInBits() >> 3; - SrcValue = LD->getSrcValue(); - SrcValueOffset = LD->getSrcValueOffset(); - SrcValueAlign = LD->getOriginalAlignment(); - TBAAInfo = LD->getTBAAInfo(); - return true; - } - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - Ptr = ST->getBasePtr(); - Size = ST->getMemoryVT().getSizeInBits() >> 3; - SrcValue = ST->getSrcValue(); - SrcValueOffset = ST->getSrcValueOffset(); - SrcValueAlign = ST->getOriginalAlignment(); - TBAAInfo = ST->getTBAAInfo(); - return false; - } - llvm_unreachable("FindAliasInfo expected a memory operand"); + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign, + const MDNode *&TBAAInfo) const { + LSBaseSDNode *LS = cast<LSBaseSDNode>(N); + + Ptr = LS->getBasePtr(); + Size = LS->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LS->getSrcValue(); + SrcValueOffset = LS->getSrcValueOffset(); + SrcValueAlign = LS->getOriginalAlignment(); + TBAAInfo = LS->getTBAAInfo(); + return isa<LoadSDNode>(LS); } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index b4946ec..fd8ce78 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -561,12 +561,19 @@ bool FastISel::SelectCall(const User *I) { return true; } + MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); + ComputeUsesVAFloatArgument(*Call, &MMI); + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. switch (F->getIntrinsicID()) { default: break; + // At -O0 we don't care about the lifetime intrinsics. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || @@ -630,60 +637,6 @@ bool FastISel::SelectCall(const User *I) { } return true; } - case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(Call->getType()); - if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) - break; - - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(Call, ResultReg); - return true; - } - case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(Call->getType()); - if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) - break; - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { -#ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(Call); -#endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - - unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(Call); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - - UpdateValueMap(Call, ResultReg); - - return true; - } case Intrinsic::objectsize: { ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; @@ -775,8 +728,8 @@ bool FastISel::SelectBitCast(const User *I) { // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { - TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); - TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); @@ -1419,8 +1372,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { - // Promote MVT::i1. - if (VT == MVT::i1) + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 90d35cc..8dde919 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { GetReturnInfo(Fn->getReturnType(), Fn->getAttributes().getRetAttributes(), Outs, TLI); CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), + Fn->isVarArg(), Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for @@ -95,11 +96,13 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { (TySize >= 8 && isa<ArrayType>(Ty) && cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, + MayNeedSP); } for (; BB != EB; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) @@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { /// argument. This overrides previous frame index entry for this argument, /// if any. void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, - int FI) { + int FI) { ByValArgFrameIndexMap[A] = FI; } @@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; - DEBUG(dbgs() << "Argument does not have assigned frame index!"); + DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); return 0; } +/// ComputeUsesVAFloatArgument - Determine if any floating-point values are +/// being passed to this variadic function, and set the MachineModuleInfo's +/// usesVAFloatArgument flag if so. This flag is used to emit an undefined +/// reference to _fltused on Windows, which will link in MSVCRT's +/// floating-point support. +void llvm::ComputeUsesVAFloatArgument(const CallInst &I, + MachineModuleInfo *MMI) +{ + FunctionType *FT = cast<FunctionType>( + I.getCalledValue()->getType()->getContainedType(0)); + if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + Type* T = I.getArgOperand(i)->getType(); + for (po_iterator<Type*> i = po_begin(T), e = po_end(T); + i != e; ++i) { + if (i->isFloatingPointTy()) { + MMI->setUsesVAFloatArgument(true); + return; + } + } + } + } +} + /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, @@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - SmallPtrSet<const BasicBlock*, 4> Visited; - - // The 'eh.selector' call may not be in the direct successor of a basic block, - // but could be several successors deeper. If we don't find it, try going one - // level further. <rdar://problem/8824861> - while (Visited.insert(SuccBB)) { - for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end(); - I != E; ++I) - if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { - // Apply the catch info to LPad. - AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]); -#ifndef NDEBUG - if (!FLI.MBBMap[SuccBB]->isLandingPad()) - FLI.CatchInfoFound.insert(EHSel); -#endif - return; - } - - const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator()); - if (Br && Br->isUnconditional()) - SuccBB = Br->getSuccessor(0); - else - break; - } -} - /// AddLandingPadInfo - Extract the exception handling information from the /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index cb6fd53..1467d88 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, MI->addOperand(MachineOperand::CreateFPImm(CFP)); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), TGA->getTargetFlags())); @@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); if ((i & 1) == 0) { - unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); - unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); - const TargetRegisterClass *SRC = + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); + // Skip physical registers as they don't have a vreg to get and we'll + // insert copies for them in TwoAddressInstructionPass anyway. + if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); - if (SRC && SRC != RC) { - MRI->setRegClass(NewVReg, SRC); - RC = SRC; + if (SRC && SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } } } AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, @@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); - // The MachineInstr constructor adds implicit-def operands. Scan through - // these to determine which are dead. - if (MI->getNumOperands() != 0 && - Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { - // First, collect all used registers. - SmallVector<unsigned, 8> UsedRegs; - for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) - if (F->getOpcode() == ISD::CopyFromReg) - UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); - else { - // Collect declared implicit uses. - const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); - UsedRegs.append(MCID.getImplicitUses(), - MCID.getImplicitUses() + MCID.getNumImplicitUses()); - // In addition to declared implicit uses, we must also check for - // direct RegisterSDNode operands. - for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) - if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { - unsigned Reg = R->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - UsedRegs.push_back(Reg); - } - } - // Then mark unused registers as dead. - MI->setPhysRegsDeadExcept(UsedRegs, *TRI); - } - // Add result register values for things that are defined by this // instruction. if (NumResults) @@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); + // The MachineInstr may also define physregs instead of virtregs. These + // physreg values can reach other instructions in different ways: + // + // 1. When there is a use of a Node value beyond the explicitly defined + // virtual registers, we emit a CopyFromReg for one of the implicitly + // defined physregs. This only happens when HasPhysRegOuts is true. + // + // 2. A CopyFromReg reading a physreg may be glued to this instruction. + // + // 3. A glued instruction may implicitly use a physreg. + // + // 4. A glued instruction may use a RegisterSDNode operand. + // + // Collect all the used physreg defs, and make sure that any unused physreg + // defs are marked as dead. + SmallVector<unsigned, 8> UsedRegs; + // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; - if (Node->hasAnyUseOfValue(i)) - EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); - // If there are no uses, mark the register as dead now, so that - // MachineLICM/Sink can see that it's dead. Don't do this if the - // node has a Glue value, for the benefit of targets still using - // Glue for values in physregs. - else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) - MI->addRegisterDead(Reg, TRI); + if (!Node->hasAnyUseOfValue(i)) + continue; + // This implicitly defined physreg has a use. + UsedRegs.push_back(Reg); + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } - // If the instruction has implicit defs and the node doesn't, mark the - // implicit def as dead. If the node has any glue outputs, we don't do this - // because we don't know what implicit defs are being used by glued nodes. - if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) - if (const unsigned *IDList = II.getImplicitDefs()) { - for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); - i != e; ++i) - MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); + // Scan the glue chain for any used physregs. + if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { + if (F->getOpcode() == ISD::CopyFromReg) { + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + continue; + } else if (F->getOpcode() == ISD::CopyToReg) { + // Skip CopyToReg nodes that are internal to the glue chain. + continue; + } + // Collect declared implicit uses. + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } } + } + + // Finally mark unused registers as dead. + if (!UsedRegs.empty() || II.getImplicitDefs()) + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. #ifdef NDEBUG @@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, Node->dump(); #endif llvm_unreachable("This target-independent node should have been selected!"); - break; case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); - break; case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 75f5761..31df458 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -85,7 +85,7 @@ private: /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, - SmallVectorImpl<int> &Mask) const; + ArrayRef<int> Mask) const; void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -177,7 +177,7 @@ public: SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, - SmallVectorImpl<int> &Mask) const { + ArrayRef<int> Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; @@ -893,7 +893,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->dump( &DAG); dbgs() << "\n"; #endif - assert(0 && "Do not know how to legalize this operator!"); + llvm_unreachable("Do not know how to legalize this operator!"); case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: @@ -910,7 +910,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Tmp4 = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned load and the target doesn't support it, // expand it. @@ -1079,7 +1079,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Tmp2 = Ch; } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH @@ -1185,7 +1185,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Tmp3 = ST->getValue(); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1290,7 +1290,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ReplaceNode(SDValue(Node, 0), Result); } else { switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1556,7 +1556,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, EVT OpVT = LHS.getValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: assert(0 && "Unknown condition code action!"); + default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; @@ -1564,7 +1564,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { - default: assert(0 && "Don't know how to expand this condition!"); + default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; @@ -1699,7 +1699,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // If all elements are constants, create a load from the constant pool. if (isConstant) { - std::vector<Constant*> CV; + SmallVector<Constant*, 16> CV; for (unsigned i = 0, e = NumElems; i != e; ++i) { if (ConstantFPSDNode *V = dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { @@ -1788,7 +1788,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); if (!CallInfo.second.getNode()) @@ -1821,7 +1821,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); return CallInfo.first; @@ -1853,7 +1853,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); return CallInfo; @@ -1866,7 +1866,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; @@ -1883,7 +1883,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -1898,7 +1898,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -1943,7 +1943,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -1985,7 +1985,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); // Remainder is loaded back from the stack frame. SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, @@ -2160,7 +2161,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // offset depending on the data type. uint64_t FF; switch (Op0.getValueType().getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported integer type!"); + default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) @@ -2282,7 +2283,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unhandled Expand type in BSWAP!"); + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); @@ -2339,7 +2340,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl) { switch (Opc) { - default: assert(0 && "Cannot expand this yet!"); + default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT); @@ -2438,7 +2439,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { switch (Opc) { default: llvm_unreachable("Unhandled atomic intrinsic Expand!"); - break; case ISD::ATOMIC_SWAP: switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type for atomic!"); @@ -2564,7 +2564,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), Args, DAG, dl); @@ -2641,7 +2641,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); Results.push_back(CallResult.second); @@ -2795,15 +2795,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(2), dl)); break; case ISD::VECTOR_SHUFFLE: { - SmallVector<int, 8> Mask; - cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + SmallVector<int, 32> NewMask; + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - if (!TLI.isTypeLegal(EltVT)) - EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!TLI.isTypeLegal(EltVT)) { + + EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + + // BUILD_VECTOR operands are allowed to be wider than the element type. + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + if (NewEltVT.bitsLT(EltVT)) { + + // Convert shuffle node. + // If original node was v4i64 and the new EltVT is i32, + // cast operands to v8i32 and re-build the mask. + + // Calculate new VT, the size of the new VT should be equal to original. + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits()/NewEltVT.getSizeInBits()); + assert(NewVT.bitsEq(VT)); + + // cast operands to new VT + Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); + + // Convert the shuffle mask + unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + + // EltVT gets smaller + assert(factor > 0); + + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]); + } + else { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]*factor+fi); + } + } + Mask = NewMask; + VT = NewVT; + } + EltVT = NewEltVT; + } unsigned NumElems = VT.getVectorNumElements(); - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 16> Ops; for (unsigned i = 0; i != NumElems; ++i) { if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); @@ -2812,14 +2854,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned Idx = Mask[i]; if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Node->getOperand(0), + Op0, DAG.getIntPtrConstant(Idx))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Node->getOperand(1), + Op1, DAG.getIntPtrConstant(Idx - NumElems))); } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); break; } @@ -2980,7 +3025,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::EXCEPTIONADDR: { - unsigned Reg = TLI.getExceptionAddressRegister(); + unsigned Reg = TLI.getExceptionPointerRegister(); assert(Reg && "Can't expand to unknown register!"); Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, Node->getValueType(0))); @@ -3520,8 +3565,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; } case ISD::VECTOR_SHUFFLE: { - SmallVector<int, 8> Mask; - cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); // Cast the two input vectors. Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); @@ -3546,6 +3590,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FPOW: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ISD::FPOW, dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0))); + break; + } + case ISD::FLOG2: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FEXP: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp2, DAG.getIntPtrConstant(0))); + break; + } } // Replace the original node with the legalized result. diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6732d37..e393896 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, case ISD::SETUEQ: LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; break; - default: assert(false && "Do not know how to soften this setcc!"); + default: llvm_unreachable("Do not know how to soften this setcc!"); } } @@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, switch (SrcVT.getSimpleVT().SimpleTy) { default: - assert(false && "Unsupported UINT_TO_FP!"); + llvm_unreachable("Unsupported UINT_TO_FP!"); case MVT::i32: Parts = TwoE32; break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 1c02c4f..41506d1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -212,9 +212,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); - break; case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: @@ -252,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); } case TargetLowering::TypeWidenVector: - if (OutVT.bitsEq(NInVT)) - // The input is widened to the same size. Convert to the widened value. - return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp)); + // The input is widened to the same size. Convert to the widened value. + // Make sure that the outgoing value is not a vector, because this would + // make us bitcast between two vectors which are legalized in different ways. + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); } return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, @@ -489,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { - SDValue Mask = GetPromotedInteger(N->getOperand(0)); + SDValue Mask = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy)); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), @@ -1176,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { switch (Opc) { default: llvm_unreachable("Unhandled atomic intrinsic Expand!"); - break; case ISD::ATOMIC_SWAP: switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type for atomic!"); @@ -1395,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { } } -#if 0 - // FIXME: This code is broken for shifts with a zero amount! // If we know that all of the high bits of the shift amount are zero, then we // can do this as a couple of simple shifts. if ((KnownZero & HighBitMask) == HighBitMask) { - // Compute 32-amt. - SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy, - DAG.getConstant(NVTBits, ShTy), - Amt); + // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined + // shift if x is zero. We can use XOR here because x is known to be smaller + // than 32. + SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, + DAG.getConstant(NVTBits-1, ShTy)); + unsigned Op1, Op2; switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); @@ -1412,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; } - Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt); - Hi = DAG.getNode(ISD::OR, NVT, - DAG.getNode(Op1, NVT, InH, Amt), - DAG.getNode(Op2, NVT, InL, Amt2)); + // When shifting right the arithmetic for Lo and Hi is swapped. + if (N->getOpcode() != ISD::SHL) + std::swap(InL, InH); + + // Use a little trick to get the bits that move from Lo to Hi. First + // shift by one bit. + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy)); + // Then compute the remaining shift with amount-1. + SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); + + Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2); + + if (N->getOpcode() != ISD::SHL) + std::swap(Hi, Lo); return true; } -#endif return false; } @@ -1498,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; } - - return false; } void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, @@ -2311,8 +2321,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), false, - true, Func, Args, DAG, dl); + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Func, Args, DAG, dl); SplitInteger(CallInfo.first, Lo, Hi); SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, @@ -2787,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { else if (SrcVT == MVT::i128) FF = APInt(32, F32TwoE128); else - assert(false && "Unsupported UINT_TO_FP!"); + llvm_unreachable("Unsupported UINT_TO_FP!"); // Check whether the sign bit is set. SDValue Lo, Hi; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 04a6a4a..439aa4d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() { for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); switch (getTypeAction(ResultVT)) { - default: - assert(false && "Unknown action!"); case TargetLowering::TypeLegal: break; // The following calls must take care of *all* of the node's results, @@ -275,8 +273,6 @@ ScanOperands: EVT OpVT = N->getOperand(i).getValueType(); switch (getTypeAction(OpVT)) { - default: - assert(false && "Unknown action!"); case TargetLowering::TypeLegal: continue; // The following calls must either replace all of the node's results @@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { - assert(Result.getValueType() == Op.getValueType().getVectorElementType() && + // Note that in some cases vector operation operands may be greater than + // the vector element type. For example BUILD_VECTOR of type <1 x i1> with + // a constant i8 operand. + assert(Result.getValueType().getSizeInBits() >= + Op.getValueType().getVectorElementType().getSizeInBits() && "Invalid type for scalarized vector"); AnalyzeNewValue(Result); @@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + false, 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); return CallInfo.first; } @@ -1088,7 +1089,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); return CallInfo; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 7c5472b..a8ff7c6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -45,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Handle some special cases efficiently. switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 4696c0d..3ae8345 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -141,7 +141,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { EVT ValVT = ST->getValue().getValueType(); if (StVT.isVector() && ST->isTruncatingStore()) switch (TLI.getTruncStoreAction(ValVT, StVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7ca0d1e..a8aee12 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -404,6 +404,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; switch (N->getOpcode()) { default: @@ -1565,12 +1569,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); - break; case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: + // If the incoming type is a vector that is being promoted, then + // we know that the elements are arranged differently and that we + // must perform the conversion using a stack slot. + if (InVT.isVector()) + break; + // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. InOp = GetPromotedInteger(InOp); @@ -2326,19 +2333,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); + SDValue L; if (LdWidth < NewVTWidth) { // Our current type we are using is too large, find a better size NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); - } - - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, isInvariant, MinAlign(Align, Increment)); - LdChain.push_back(LdOp.getValue(1)); - LdOps.push_back(LdOp); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector()) { + SmallVector<SDValue, 16> Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), + &Loads[0], Loads.size()); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + LdWidth -= NewVTWidth; } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp new file mode 100644 index 0000000..1a27f3f --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -0,0 +1,657 @@ +//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ResourcePriorityQueue class, which is a +// SchedulingPriorityQueue that prioritizes instructions using DFA state to +// reduce the length of the critical path through the basic block +// on VLIW platforms. +// The scheduler is basically a top-down adaptable list scheduler with DFA +// resource tracking added to the cost function. +// DFA is queried as a state machine to model "packets/bundles" during +// schedule. Currently packets/bundles are discarded at the end of +// scheduling, affecting only order of instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable use of DFA during scheduling")); + +static cl::opt<signed> RegPressureThreshold( + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); + + +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : + Picker(this), + InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData()) +{ + TII = IS->getTargetLowering().getTargetMachine().getInstrInfo(); + TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo(); + TLI = &IS->getTargetLowering(); + + const TargetMachine &tm = (*IS->MF).getTarget(); + ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); + // This hard requirment could be relaxed, but for now + // do not let it procede. + assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); + + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; +} + +unsigned +ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *PredSU = I->getSUnit(); + const SDNode *ScegN = PredSU->getNode(); + + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: NumberDeps++; break; + case ISD::CopyToReg: break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + EVT VT = ScegN->getValueType(i); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, + unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *SuccSU = I->getSUnit(); + const SDNode *ScegN = SuccSU->getNode(); + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: break; + case ISD::CopyToReg: NumberDeps++; break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +static unsigned numberCtrlDepsInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +static unsigned numberCtrlPredInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +/// +/// Initialize nodes. +/// +void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + initNumRegDefsLeft(SU); + SU->NodeQueueId = 0; + } +} + +/// This heuristic is used if DFA scheduling is not desired +/// for some VLIW platform. +bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +void ResourcePriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + Queue.push_back(SU); +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getNode()) + return false; + + // If this is a compound instruction, + // it is likely to be a call. Do not delay it. + if (SU->getNode()->getGluedNode()) + return true; + + // First see if the pipeline could receive this instruction + // in the current cycle. + if (SU->getNode()->isMachineOpcode()) + switch (SU->getNode()->getMachineOpcode()) { + default: + if (!ResourcesModel->canReserveResources(&TII->get( + SU->getNode()->getMachineOpcode()))) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + + // Now see if there are no other dependencies + // to instructions alredy in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignor order deps. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + + return true; +} + +/// Keep track of available resources. +void ResourcePriorityQueue::reserveResources(SUnit *SU) { + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { + ResourcesModel->clearResources(); + Packet.clear(); + } + + if (SU->getNode() && SU->getNode()->isMachineOpcode()) { + switch (SU->getNode()->getMachineOpcode()) { + default: + ResourcesModel->reserveResources(&TII->get( + SU->getNode()->getMachineOpcode())); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + Packet.push_back(SU); + } + // Forcefully end packet for PseudoOps. + else { + ResourcesModel->clearResources(); + Packet.clear(); + } + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + } +} + +signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + // Gen estimate. + for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { + EVT VT = SU->getNode()->getValueType(i); + if (TLI->isTypeLegal(VT) + && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance += numberRCValSuccInSU(SU, RCId); + } + // Kill estimate. + for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { + const SDValue &Op = SU->getNode()->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (isa<ConstantSDNode>(Op.getNode())) + continue; + + if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance -= numberRCValPredInSU(SU, RCId); + } + return RegBalance; +} + +/// Estimates change in reg pressure from this SU. +/// It is acheived by trivial tracking of defined +/// and used vregs in dependent instructions. +/// The RawPressure flag makes this function to ignore +/// existing reg file sizes, and report raw def/use +/// balance. +signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + if (RawPressure) { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + else { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + if ((RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) > 0) && + (RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + + return RegBalance; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 100; +static const unsigned PriorityThree = 50; +static const unsigned PriorityFour = 15; +static const unsigned PriorityFive = 5; +static const unsigned ScaleOne = 20; +static const unsigned ScaleTwo = 10; +static const unsigned ScaleThree = 5; +static const unsigned FactorOne = 2; + +/// Returns single number reflecting benefit of scheduling SU +/// in the current cycle. +signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { + // Initial trivial priority. + signed ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Adaptable scheduling + // A small, but very parallel + // region, where reg pressure is an issue. + if (HorizontalVerticalBalance > RegPressureThreshold) { + // Critical path first + ResCount += (SU->getHeight() * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + // Consider change to reg pressure from scheduling + // this SU. + ResCount -= (regPressureDelta(SU,true) * ScaleOne); + } + // Default heuristic, greeady and + // critical path driven. + else { + // Critical path first. + ResCount += (SU->getHeight() * ScaleTwo); + // Now see how many instructions is blocked by this SU. + ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + ResCount -= (regPressureDelta(SU) * ScaleTwo); + } + + // These are platform specific things. + // Will need to go into the back end + // and accessed from here via a hook. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.isCall()) + ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + } + else + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + ResCount += PriorityFive; + break; + + case ISD::INLINEASM: + ResCount += PriorityFour; + break; + } + } + return ResCount; +} + + +/// Main resource tracking point. +void ResourcePriorityQueue::ScheduledNode(SUnit *SU) { + // Use NULL entry as an event marker to reset + // the DFA state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + return; + } + + const SDNode *ScegN = SU->getNode(); + // Update reg pressure tracking. + // First update current node. + if (ScegN->isMachineOpcode()) { + // Estimate generated regs. + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + EVT VT = ScegN->getValueType(i); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) + RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); + } + } + // Estimate killed regs. + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) { + if (RegPressure[RC->getID()] > + (numberRCValPredInSU(SU, RC->getID()))) + RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); + else RegPressure[RC->getID()] = 0; + } + } + } + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) + continue; + --I->getSUnit()->NumRegDefsLeft; + } + } + + // Reserve resources for this SU. + reserveResources(SU); + + // Adjust number of parallel live ranges. + // Heuristic is simple - node with no data successors reduces + // number of live ranges. All others, increase it. + unsigned NumberNonControlDeps = 0; + + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + adjustPriorityOfUnscheduledPreds(I->getSUnit()); + if (!I->isCtrl()) + NumberNonControlDeps++; + } + + if (!NumberNonControlDeps) { + if (ParallelLiveRanges >= SU->NumPreds) + ParallelLiveRanges -= SU->NumPreds; + else + ParallelLiveRanges = 0; + + } + else + ParallelLiveRanges += SU->NumRegDefsLeft; + + // Track parallel live chains. + HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); + HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); +} + +void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { + unsigned NodeNumDefs = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + // No register need be allocated for this. + if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + NodeNumDefs = 0; + break; + } + NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); + } + else + switch(N->getOpcode()) { + default: break; + case ISD::CopyFromReg: + NodeNumDefs++; + break; + case ISD::INLINEASM: + NodeNumDefs++; + break; + } + + SU->NumRegDefsLeft = NodeNumDefs; +} + +/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) + return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + + +/// Main access point - returns next instructions +/// to be placed in scheduling sequence. +SUnit *ResourcePriorityQueue::pop() { + if (empty()) + return 0; + + std::vector<SUnit *>::iterator Best = Queue.begin(); + if (!DisableDFASched) { + signed BestCost = SUSchedulingCost(*Best); + for (std::vector<SUnit *>::iterator I = Queue.begin(), + E = Queue.end(); I != E; ++I) { + if (*I == *Best) + continue; + + if (SUSchedulingCost(*I) > BestCost) { + BestCost = SUSchedulingCost(*I); + Best = I; + } + } + } + // Use default TD scheduling mechanism. + else { + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + } + + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + + Queue.pop_back(); + + return V; +} + + +void ResourcePriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + + Queue.pop_back(); +} + + +#ifdef NDEBUG +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { + ResourcePriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index b275c63..34ee1f3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, Added = true; } } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { if (RegAdded.insert(*Alias)) { LRegs.push_back(*Alias); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 80162d7..1017d36 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1175,7 +1175,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SmallSet<unsigned, 4> &RegAdded, SmallVector<unsigned, 4> &LRegs, const TargetRegisterInfo *TRI) { - for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { // Check if Ref is live. if (!LiveRegDefs[*AliasI]) continue; @@ -1190,6 +1190,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, } } +/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered +/// by RegMask, and add them to LRegs. +static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs) { + // Look at all live registers. Skip Reg0 and the special CallResource. + for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { + if (!LiveRegDefs[i]) continue; + if (LiveRegDefs[i] == SU) continue; + if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; + if (RegAdded.insert(i)) + LRegs.push_back(i); + } +} + +/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. +static const uint32_t *getNodeRegMask(const SDNode *N) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (const RegisterMaskSDNode *Op = + dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) + return Op->getRegMask(); + return NULL; +} + /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do @@ -1255,6 +1280,9 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { LRegs.push_back(CallResource); } } + if (const uint32_t *RegMask = getNodeRegMask(Node)) + CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; @@ -2635,7 +2663,8 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, const TargetRegisterInfo *TRI) { const unsigned *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); - if(!ImpDefs) + const uint32_t *RegMask = getNodeRegMask(SU->getNode()); + if(!ImpDefs && !RegMask) return false; for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); @@ -2646,14 +2675,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, if (!PI->isAssignedRegDep()) continue; - for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) { - // Return true if SU clobbers this physical register use and the - // definition of the register reaches from DepSU. IsReachable queries a - // topological forward sort of the DAG (following the successors). - if (TRI->regsOverlap(*ImpDef, PI->getReg()) && - scheduleDAG->IsReachable(DepSU, PI->getSUnit())) - return true; - } + if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + + if (ImpDefs) + for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(*ImpDef, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; } } return false; @@ -2674,8 +2707,9 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, continue; const unsigned *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); - if (!SUImpDefs) - return false; + const uint32_t *SURegMask = getNodeRegMask(SUNode); + if (!SUImpDefs && !SURegMask) + continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue || VT == MVT::Other) @@ -2683,6 +2717,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!N->hasAnyUseOfValue(i)) continue; unsigned Reg = ImpDefs[i - NumDefs]; + if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) + return true; + if (!SUImpDefs) + continue; for (;*SUImpDefs; ++SUImpDefs) { unsigned SUReg = *SUImpDefs; if (TRI->regsOverlap(Reg, SUReg)) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 9c27b2e..17b4901 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -53,6 +53,7 @@ namespace llvm { if (isa<ConstantSDNode>(Node)) return true; if (isa<ConstantFPSDNode>(Node)) return true; if (isa<RegisterSDNode>(Node)) return true; + if (isa<RegisterMaskSDNode>(Node)) return true; if (isa<GlobalAddressSDNode>(Node)) return true; if (isa<BasicBlockSDNode>(Node)) return true; if (isa<FrameIndexSDNode>(Node)) return true; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp new file mode 100644 index 0000000..7d12509 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -0,0 +1,276 @@ +//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + VLIWScheduler("vliw-td", "VLIW scheduler", + createVLIWDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This +/// supports / top-down scheduling. +/// +class ScheduleDAGVLIW : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands become available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + +public: + ScheduleDAGVLIW(MachineFunction &mf, + AliasAnalysis *aa, + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + + const TargetMachine &tm = mf.getTarget(); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } + + ~ScheduleDAGVLIW() { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule(); + +private: + void releaseSucc(SUnit *SU, const SDep &D); + void releaseSuccessors(SUnit *SU); + void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void listScheduleTopDown(); +}; +} // end anonymous namespace + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGVLIW::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " '" << BB->getName() << "' **********\n"); + + // Build the scheduling graph. + BuildSchedGraph(AA); + + AvailableQueue->initNodes(SUnits); + + listScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { + SUnit *SuccSU = D.getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { + PendingQueue.push_back(SuccSU); + } +} + +void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { + // Top down: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-td scheduler doesn't yet support physreg dependencies!"); + + releaseSucc(SU, *I); + } +} + +/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + releaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// listScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGVLIW::listScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + releaseSuccessors(&EntrySU); + + // All leaves to AvailableQueue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While AvailableQueue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() == CurCycle) { + AvailableQueue->push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } + else { + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + // Reset DFA state. + AvailableQueue->ScheduledNode(0); + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + scheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DEBUG(dbgs() << "*** Emitting noop\n"); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createVLIWDAGScheduler - This creates a top-down list scheduler. +ScheduleDAGSDNodes * +llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS)); +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index dd626e2..796abf4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -383,7 +383,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::Register: ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); break; - + case ISD::RegisterMask: + ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); + break; case ISD::SRCVALUE: ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); break; @@ -1037,10 +1039,8 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(apf, VT, isTarget); - } else { - assert(0 && "Unsupported type in getConstantFP"); - return SDValue(); - } + } else + llvm_unreachable("Unsupported type in getConstantFP"); } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, @@ -1375,6 +1375,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(N, 0); } +SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0); + ID.AddPointer(RegMask); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; @@ -2229,8 +2243,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); @@ -2259,8 +2272,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case ISD::TRUNCATE: // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. @@ -2571,17 +2583,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, "Vector element count mismatch!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || - OpOpcode == ISD::ANY_EXTEND) { + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. if (Operand.getNode()->getOperand(0).getValueType().getScalarType() .bitsLT(VT.getScalarType())) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); - else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - else - return Operand.getNode()->getOperand(0); + return Operand.getNode()->getOperand(0); } + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); break; case ISD::BITCAST: // Basic sanity checking. @@ -3143,16 +3156,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::SELECT: if (N1C) { if (N1C->getZExtValue()) - return N2; // select true, X, Y -> X - else - return N3; // select false, X, Y -> Y + return N2; // select true, X, Y -> X + return N3; // select false, X, Y -> Y } if (N2 == N3) return N2; // select C, X, X -> X break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); - break; case ISD::INSERT_SUBVECTOR: { SDValue Index = N3; if (VT.isSimple() && N1.getValueType().isSimple() @@ -3285,8 +3296,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, - const TargetLowering &TLI, - std::string &Str, unsigned Offset) { + const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { if (VT.isInteger()) @@ -3304,15 +3314,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, } assert(!VT.isVector() && "Can't handle vector type here!"); - unsigned NumBits = VT.getSizeInBits(); - unsigned MSB = NumBits / 8; + unsigned NumVTBytes = VT.getSizeInBits() / 8; + unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); + uint64_t Val = 0; - if (TLI.isLittleEndian()) - Offset = Offset + MSB - 1; - for (unsigned i = 0; i != MSB; ++i) { - Val = (Val << 8) | (unsigned char)Str[Offset]; - Offset += TLI.isLittleEndian() ? -1 : 1; + if (TLI.isLittleEndian()) { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << i*8; + } else { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } + return DAG.getConstant(Val, VT); } @@ -3327,7 +3340,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, /// isMemSrcFromString - Returns true if memcpy source is a string constant. /// -static bool isMemSrcFromString(SDValue Src, std::string &Str) { +static bool isMemSrcFromString(SDValue Src, StringRef &Str) { unsigned SrcDelta = 0; GlobalAddressSDNode *G = NULL; if (Src.getOpcode() == ISD::GlobalAddress) @@ -3341,11 +3354,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { if (!G) return false; - const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); - if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) - return true; - - return false; + return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); } /// FindOptimalMemOpLowering - Determines the optimial series memory ops @@ -3448,7 +3457,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - std::string Str; + StringRef Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); @@ -3485,7 +3494,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // We only handle zero vectors here. // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. - Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, @@ -3727,8 +3736,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMCPY), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMCPY), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), TLI.getPointerTy()), Args, *this, dl); @@ -3779,8 +3789,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), TLI.getPointerTy()), Args, *this, dl); @@ -3839,8 +3850,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMSET), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMSET), + /*isTailCall=*/false, + /*doesNotReturn*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), Args, *this, dl); @@ -5901,7 +5913,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { if (G) if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) if (getMachineOpcode() < TII->getNumOpcodes()) - return TII->get(getMachineOpcode()).getName(); + return TII->getName(getMachineOpcode()); return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; } if (G) { @@ -5945,7 +5957,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::BasicBlock: return "BasicBlock"; case ISD::VALUETYPE: return "ValueType"; case ISD::Register: return "Register"; - + case ISD::RegisterMask: return "RegisterMask"; case ISD::Constant: return "Constant"; case ISD::ConstantFP: return "ConstantFP"; case ISD::GlobalAddress: return "GlobalAddress"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 68c9514..4e4aa11 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getIntPtrConstant(1)); + DAG.getTargetConstant(1, TLI.getPointerTy())); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); - return SDValue(); } /// getCopyFromParts - Create a value that contains the specified legal parts @@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { - assert(PartVT.isInteger() && ValueVT.isInteger() && + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. @@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. - assert(PartVT.isInteger() && ValueVT.isInteger() && + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } // The value may have changed - recompute ValueVT. @@ -966,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DAG.AddDbgValue(SDV, Val.getNode(), false); } } else - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -1056,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getMergeValues(&Constants[0], Constants.size(), getCurDebugLoc()); } + + if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(C)) { + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Ops.push_back(SDValue(Val, i)); + } + + if (isa<ArrayType>(CDS->getType())) + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && @@ -1090,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector<SDValue, 16> Ops; - if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { for (unsigned i = 0; i != NumElements; ++i) - Ops.push_back(getValue(CP->getOperand(i))); + Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); EVT EltVT = TLI.getValueType(VecTy->getElementType()); @@ -1128,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } llvm_unreachable("Can't get register for value!"); - return SDValue(); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { @@ -1287,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. -uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, - MachineBasicBlock *Dst) { +uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; if (!BPI) return 0; @@ -1824,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { DAG.getBasicBlock(Return))); } -void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { -} - void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); } @@ -1839,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); AddLandingPadInfo(LP, MMI, MBB); + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother to create these DAG nodes. + if (TLI.getExceptionPointerRegister() == 0 && + TLI.getExceptionSelectorRegister() == 0) + return; + SmallVector<EVT, 2> ValueVTs; ComputeValueVTs(TLI, LP.getType(), ValueVTs); @@ -2194,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); - Constant *C = Pivot->Low; + const Constant *C = Pivot->Low; MachineBasicBlock *FalseBB = 0, *TrueBB = 0; // We know that we branch to the LHS if the Value being switched on is @@ -2387,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases - for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { - BasicBlock *SuccBB = SI.getSuccessor(i); + for (size_t i = 0; i < SI.getNumCases(); ++i) { + BasicBlock *SuccBB = SI.getCaseSuccessor(i); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; - Cases.push_back(Case(SI.getSuccessorValue(i), - SI.getSuccessorValue(i), + Cases.push_back(Case(SI.getCaseValue(i), + SI.getCaseValue(i), SMBB, ExtraWeight)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); @@ -2461,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // If there is only the default destination, branch to it if it is not the // next basic block. Otherwise, just fall through. - if (SI.getNumCases() == 1) { + if (!SI.getNumCases()) { // Update machine-CFG edges. // If this is not a fall-through branch, emit the branch. @@ -2691,7 +2715,8 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), - DestVT, N, DAG.getIntPtrConstant(0))); + DestVT, N, + DAG.getTargetConstant(0, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I){ @@ -2778,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { TLI.getValueType(I.getType()), InVec, InIdx)); } -// Utility for visitShuffleVector - Returns true if the mask is mask starting -// from SIndx and increasing to the element length (undefs are allowed). -static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { - unsigned MaskNumElts = Mask.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) - if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) +// Utility for visitShuffleVector - Return true if every element in Mask, +// begining // from position Pos and ending in Pos+Size, falls within the +// specified sequential range [L, L+Pos). or is undef. +static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, + int Pos, int Size, int Low) { + for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (Mask[i] >= 0 && Mask[i] != Low) return false; return true; } void SelectionDAGBuilder::visitShuffleVector(const User &I) { - SmallVector<int, 8> Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); - // Convert the ConstantVector mask operand into an array of ints, with -1 - // representing undef values. - SmallVector<Constant*, 8> MaskElts; - cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts); - unsigned MaskNumElts = MaskElts.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (isa<UndefValue>(MaskElts[i])) - Mask.push_back(-1); - else - Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue()); - } - + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); + unsigned MaskNumElts = Mask.size(); + EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -2820,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. - if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), - VT, Src1, Src2)); - return; + if (SrcNumElts*2 == MaskNumElts) { + // First check for Src1 in low and Src2 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + // Then check for Src2 in low and Src1 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src2, Src1)); + return; + } } // Pad both vectors with undefs to make them the same length as the mask. @@ -3048,7 +3077,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(I.getOperand(0)); - Type *Ty = I.getOperand(0)->getType(); + // Note that the pointer operand may be a vector of pointers. Take the scalar + // element which holds a pointer. + Type *Ty = I.getOperand(0)->getType()->getScalarType(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3365,7 +3396,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { DebugLoc dl = getCurDebugLoc(); ISD::NodeType NT; switch (I.getOperation()) { - default: llvm_unreachable("Unknown atomicrmw operation"); return; + default: llvm_unreachable("Unknown atomicrmw operation"); case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; @@ -3503,24 +3534,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { SDValue Op = getValue(I.getArgOperand(i)); - assert(TLI.isTypeLegal(Op.getValueType()) && - "Intrinsic uses a non-legal type?"); Ops.push_back(Op); } SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); -#ifndef NDEBUG - for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { - assert(TLI.isTypeLegal(ValueVTs[Val]) && - "Intrinsic uses a non-legal type?"); - } -#endif // NDEBUG if (HasChain) ValueVTs.push_back(MVT::Other); @@ -4480,9 +4503,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: - return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4550,7 +4573,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; } @@ -4560,11 +4583,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { - // Parameters are handled specially. - bool isParameter = - DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); + // Parameters are handled specially. + bool isParameter = + (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || + isa<Argument>(Address)); + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); if (isParameter && !AI) { @@ -4584,7 +4609,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); + DEBUG(Address->dump()); return 0; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); @@ -4606,7 +4633,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } } } - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } return 0; @@ -4652,7 +4679,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } @@ -4674,43 +4701,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); return 0; } - case Intrinsic::eh_exception: { - // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - SDValue Ops[1]; - Ops[0] = DAG.getRoot(); - SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); - setValue(&I, Op); - DAG.setRoot(Op.getValue(1)); - return 0; - } - - case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBB; - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (CallMBB->isLandingPad()) - AddCatchInfo(I, &MMI, CallMBB); - else { -#ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(&I); -#endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - - // Insert the EHSELECTION instruction. - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - SDValue Ops[2]; - Ops[0] = getValue(I.getArgOperand(0)); - Ops[1] = getRoot(); - SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); - DAG.setRoot(Op.getValue(1)); - setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); - return 0; - } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. @@ -4843,6 +4833,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } + case Intrinsic::x86_avx_vinsertf128_pd_256: + case Intrinsic::x86_avx_vinsertf128_ps_256: + case Intrinsic::x86_avx_vinsertf128_si_256: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * + ElVT.getVectorNumElements(); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getConstant(Idx, MVT::i32)); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -5058,7 +5063,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); - return 0; case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; @@ -5079,7 +5083,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, /*isReturnValueUsed=*/true, + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), Args, DAG, getCurDebugLoc()); DAG.setRoot(Result.second); @@ -5242,6 +5247,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), CS.getCallingConv(), isTailCall, + CS.doesNotReturn(), !CS.getInstruction()->use_empty(), Callee, Args, DAG, getCurDebugLoc()); assert((isTailCall || Result.second.getNode()) && @@ -5477,23 +5483,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } - // See if any floating point values are being passed to this function. This is - // used to emit an undefined reference to fltused on Windows. - FunctionType *FT = - cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (FT->isVarArg() && - !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - Type* T = I.getArgOperand(i)->getType(); - for (po_iterator<Type*> i = po_begin(T), e = po_end(T); - i != e; ++i) { - if (!i->isFloatingPointTy()) continue; - MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); - break; - } - } - } + ComputeUsesVAFloatArgument(I, &MMI); const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { @@ -5636,7 +5627,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), Tmp.getValueType(), Tmp)); @@ -5647,7 +5639,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), Tmp.getValueType(), Tmp)); @@ -5690,22 +5683,6 @@ public: : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { } - /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers - /// busy in OutputRegs/InputRegs. - void MarkAllocatedRegs(bool isOutReg, bool isInReg, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs, - const TargetRegisterInfo &TRI) const { - if (isOutReg) { - for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) - MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); - } - if (isInReg) { - for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) - MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); - } - } - /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. @@ -5753,18 +5730,6 @@ public: return TLI.getValueType(OpTy, true); } - -private: - /// MarkRegAndAliases - Mark the specified register and all aliases in the - /// specified set. - static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs, - const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); - Regs.insert(Reg); - if (const unsigned *Aliases = TRI.getAliasSet(Reg)) - for (; *Aliases; ++Aliases) - Regs.insert(*Aliases); - } }; typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; @@ -5778,39 +5743,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// Input and OutputRegs are the set of already allocated physical registers. /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc DL, - SDISelAsmOperandInfo &OpInfo, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs) { + SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); - // Compute whether this value requires an input register, an output register, - // or both. - bool isOutReg = false; - bool isInReg = false; - switch (OpInfo.Type) { - case InlineAsm::isOutput: - isOutReg = true; - - // If there is an input constraint that matches this, we need to reserve - // the input register so no other inputs allocate to it. - isInReg = OpInfo.hasMatchingInput(); - break; - case InlineAsm::isInput: - isInReg = true; - isOutReg = false; - break; - case InlineAsm::isClobber: - isOutReg = true; - isInReg = true; - break; - } - - MachineFunction &MF = DAG.getMachineFunction(); SmallVector<unsigned, 4> Regs; @@ -5884,8 +5823,6 @@ static void GetRegistersForValue(SelectionDAG &DAG, } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; } @@ -5916,8 +5853,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; - std::set<unsigned> OutputRegs, InputRegs; - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); @@ -6050,7 +5985,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // constant pool entry to get its address. const Value *OpVal = OpInfo.CallOperandVal; if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || - isa<ConstantVector>(OpVal)) { + isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), TLI.getPointerTy()); } else { @@ -6079,8 +6014,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, - InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -6091,8 +6025,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, - InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6146,9 +6079,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) - report_fatal_error("Couldn't allocate output reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // If this is an indirect operand, store through the pointer after the // asm. @@ -6248,9 +6185,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); - if (Ops.empty()) - report_fatal_error("Invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (Ops.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = @@ -6281,9 +6222,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty()) - report_fatal_error("Couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); @@ -6421,7 +6366,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, CallingConv::ID CallConv, bool isTailCall, - bool isReturnValueUsed, + bool doesNotRet, bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) const { @@ -6518,7 +6463,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, } SmallVector<SDValue, 4> InVals; - Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, + Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. @@ -6587,7 +6532,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N, SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); - return SDValue(); } void diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 5147b6c..8cf88e1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -72,7 +72,6 @@ class TargetLowering; class TruncInst; class UIToFPInst; class UnreachableInst; -class UnwindInst; class VAArgInst; class ZExtInst; @@ -130,13 +129,13 @@ private: /// Case - A struct to record the Value for a switch case, and the /// case's target basic block. struct Case { - Constant* Low; - Constant* High; + const Constant *Low; + const Constant *High; MachineBasicBlock* BB; uint32_t ExtraWeight; Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } - Case(Constant* low, Constant* high, MachineBasicBlock* bb, + Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } @@ -454,7 +453,8 @@ private: MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); - uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + uint32_t getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, uint32_t Weight = 0); public: @@ -474,7 +474,6 @@ private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); void visitResume(const ResumeInst &I); - void visitUnwind(const UnwindInst &I); void visitBinary(const User &I, unsigned OpCode); void visitShift(const User &I, unsigned Opcode); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3c95059..2173d8d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -74,7 +74,6 @@ STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch"); STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr"); STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke"); STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume"); -STATISTIC(NumFastIselFailUnwind,"Fast isel fails on Unwind"); STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable"); // Standard binary operators... @@ -218,12 +217,15 @@ namespace llvm { CodeGenOpt::Level OptLevel) { const TargetLowering &TLI = IS->getTargetLowering(); - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || + TLI.getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::Hybrid) return createHybridListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::VLIW) + return createVLIWDAGScheduler(IS, OptLevel); assert(TLI.getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); @@ -248,7 +250,6 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, "TargetLowering::EmitInstrWithCustomInserter!"; #endif llvm_unreachable(0); - return 0; } void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, @@ -262,6 +263,8 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// +void SelectionDAGISel::ISelUpdater::anchor() { } + SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), @@ -452,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there is a call to setjmp in the machine function. - MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice()); + MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); // Replace forward-declared registers with the registers containing // the desired value. @@ -777,37 +780,12 @@ void SelectionDAGISel::PrepareEHLandingPad() { .addSym(Label); // Mark exception register as live in. - unsigned Reg = TLI.getExceptionAddressRegister(); + unsigned Reg = TLI.getExceptionPointerRegister(); if (Reg) MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); if (Reg) MBB->addLiveIn(Reg); - - // FIXME: Hack around an exception handling flaw (PR1508): the personality - // function and list of typeids logically belong to the invoke (or, if you - // like, the basic block containing the invoke), and need to be associated - // with it in the dwarf exception handling tables. Currently however the - // information is provided by an intrinsic (eh.selector) that can be moved - // to unexpected places by the optimizers: if the unwind edge is critical, - // then breaking it can result in the intrinsics being in the successor of - // the landing pad, not the landing pad itself. This results - // in exceptions not being caught because no typeids are associated with - // the invoke. This may not be the only way things can go wrong, but it - // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); - - if (Br && Br->isUnconditional()) { // Critical edge? - BasicBlock::const_iterator I, E; - for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) - if (isa<EHSelectorInst>(I)) - break; - - if (I == E) - // No catch info found - try to extract some from the successor. - CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo); - } } /// TryToFoldFastISelLoad - We're checking to see if we can fold the specified @@ -901,6 +879,10 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, } #ifndef NDEBUG +// Collect per Instruction statistics for fast-isel misses. Only those +// instructions that cause the bail are accounted for. It does not account for +// instructions higher in the block. Thus, summing the per instructions stats +// will not add up to what is reported by NumFastIselFailures. static void collectFailStats(const Instruction *I) { switch (I->getOpcode()) { default: assert (0 && "<Invalid operator> "); @@ -912,7 +894,6 @@ static void collectFailStats(const Instruction *I) { case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return; case Instruction::Invoke: NumFastIselFailInvoke++; return; case Instruction::Resume: NumFastIselFailResume++; return; - case Instruction::Unwind: NumFastIselFailUnwind++; return; case Instruction::Unreachable: NumFastIselFailUnreachable++; return; // Standard binary operators... @@ -974,7 +955,6 @@ static void collectFailStats(const Instruction *I) { case Instruction::InsertValue: NumFastIselFailInsertValue++; return; case Instruction::LandingPad: NumFastIselFailLandingPad++; return; } - return; } #endif @@ -2199,6 +2179,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::EntryToken: // These nodes remain the same. case ISD::BasicBlock: case ISD::Register: + case ISD::RegisterMask: //case ISD::VALUETYPE: //case ISD::CONDCODE: case ISD::HANDLENODE: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index a7cf089..6cde05a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Config/config.h" using namespace llvm; namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9ced1ac..792de75 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -572,21 +572,22 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); setOperationAction(ISD::ConstantFP, MVT::f32, Expand); setOperationAction(ISD::ConstantFP, MVT::f64, Expand); setOperationAction(ISD::ConstantFP, MVT::f80, Expand); // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f16, Expand); + setOperationAction(ISD::FLOG2, MVT::f16, Expand); + setOperationAction(ISD::FLOG10, MVT::f16, Expand); + setOperationAction(ISD::FEXP , MVT::f16, Expand); + setOperationAction(ISD::FEXP2, MVT::f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); + setOperationAction(ISD::FCEIL, MVT::f16, Expand); + setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::f16, Expand); setOperationAction(ISD::FLOG , MVT::f32, Expand); setOperationAction(ISD::FLOG2, MVT::f32, Expand); setOperationAction(ISD::FLOG10, MVT::f32, Expand); @@ -597,6 +598,16 @@ TargetLowering::TargetLowering(const TargetMachine &tm, setOperationAction(ISD::FCEIL, MVT::f32, Expand); setOperationAction(ISD::FRINT, MVT::f32, Expand); setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -1597,23 +1608,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; case ISD::SIGN_EXTEND_INREG: { - EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); + // If we only care about the highest bit, don't bother shifting right. + if (MsbMask == DemandedMask) { + unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, ShiftAmt)); + } // Sign extension. Compute the demanded bits in the result that are not // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getScalarType().getSizeInBits()); + BitWidth - ExVT.getScalarType().getSizeInBits()); // If none of the extended bits are demanded, eliminate the sextinreg. if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth); + APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, - EVT.getScalarType().getSizeInBits()) & + ExVT.getScalarType().getSizeInBits()) & NewMask; // Since the sign extended bits are demanded, we know that the sign @@ -1631,7 +1659,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known zero, convert this into a zero extension. if (KnownZero.intersects(InSignBit)) return TLO.CombineTo(Op, - TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); if (KnownOne.intersects(InSignBit)) { // Input sign bit known set KnownOne |= NewBits; @@ -2995,7 +3023,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( /// is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { - default: llvm_unreachable("Unknown constraint type!"); case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -3006,6 +3033,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_Memory: return 3; } + llvm_unreachable("Invalid constraint type"); } /// Examine constraint type and operand type and determine a weight value. |