diff options
| author | Nowar Gu <nowar100@gmail.com> | 2011-06-17 14:29:24 +0800 |
|---|---|---|
| committer | Nowar Gu <nowar100@gmail.com> | 2011-06-20 15:49:07 +0800 |
| commit | 907af0f20f58f2ea26da7ea64e1f094cd6880db7 (patch) | |
| tree | 02007757de416c561df174d582205cebfa582801 /lib/CodeGen/SelectionDAG | |
| parent | 1d4f9a57447faa0142a1d0301e5ce550cfe60c4f (diff) | |
| parent | ec324e5ae44025c6bdb930b78198f30f807e355b (diff) | |
| download | external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.zip external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.gz external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.bz2 | |
Merge upstream to r133240 at Fri. 17th Jun 2011.
Conflicts:
lib/CodeGen/AsmPrinter/AsmPrinter.cpp
lib/Target/ARM/ARMCodeEmitter.cpp
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
18 files changed, 2137 insertions, 1079 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a9afec8..4ac590a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -138,6 +138,10 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType); + /// combine - call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -165,6 +169,8 @@ namespace { SDValue visitMULHS(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitSMULO(SDNode *N); + SDValue visitUMULO(SDNode *N); SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); SDValue visitAND(SDNode *N); @@ -529,7 +535,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, cast<ConstantSDNode>(N0.getOperand(1)), cast<ConstantSDNode>(N1)); return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); - } else if (N0.hasOneUse()) { + } + if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, N0.getOperand(0), N1); @@ -546,7 +553,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, cast<ConstantSDNode>(N1.getOperand(1)), cast<ConstantSDNode>(N0)); return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); - } else if (N1.hasOneUse()) { + } + if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, N1.getOperand(0), N0); @@ -990,6 +998,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) { dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); dbgs() << '\n'); + + // Transfer debug value. + DAG.TransferDbgValues(SDValue(N, 0), RV); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); @@ -1045,6 +1056,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MULHS: return visitMULHS(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SMULO: return visitSMULO(N); + case ISD::UMULO: return visitUMULO(N); case ISD::SDIVREM: return visitSDIVREM(N); case ISD::UDIVREM: return visitUDIVREM(N); case ISD::AND: return visitAND(N); @@ -1566,7 +1579,8 @@ static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations) { if (!VT.isVector()) { return DAG.getConstant(0, VT); - } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { + } + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { // Produce a vector of zeros. SDValue El = DAG.getConstant(0, VT.getVectorElementType()); std::vector<SDValue> Ops(VT.getVectorNumElements(), El); @@ -2174,6 +2188,26 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSMULO(SDNode *N) { + // (smulo x, 2) -> (saddo x, x) + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (C2->getAPIntValue() == 2) + return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), + N->getOperand(0), N->getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitUMULO(SDNode *N) { + // (umulo x, 2) -> (uaddo x, x) + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (C2->getAPIntValue() == 2) + return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), + N->getOperand(0), N->getOperand(0)); + + return SDValue(); +} + SDValue DAGCombiner::visitSDIVREM(SDNode *N) { SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); if (Res.getNode()) return Res; @@ -3000,6 +3034,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; + // fold (shl undef, x) -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) @@ -3062,26 +3099,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } - // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or - // (srl (and x, (shl -1, c1)), (sub c1, c2)) + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or + // (and (srl x, (sub c1, c2), MASK) if (N1C && N0.getOpcode() == ISD::SRL && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); if (c1 < VT.getSizeInBits()) { uint64_t c2 = N1C->getZExtValue(); - SDValue HiBitsMask = - DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - c1), - VT); - SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, - N0.getOperand(0), - HiBitsMask); - if (c2 > c1) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c2-c1, N1.getValueType())); - else - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c1-c2, N1.getValueType())); + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - c1); + SDValue Shift; + if (c2 > c1) { + Mask = Mask.shl(c2-c1); + Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c2-c1, N1.getValueType())); + } else { + Mask = Mask.lshr(c1-c2); + Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1-c2, N1.getValueType())); + } + return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, + DAG.getConstant(Mask, VT)); } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) @@ -3323,8 +3361,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + uint64_t ShiftAmt = N1C->getZExtValue(); SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); + N0.getOperand(0), + DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); } @@ -3663,6 +3703,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } +void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType) { + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), + &Ops[0], Ops.size())); + } +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3751,27 +3813,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3799,6 +3842,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (and/or/xor (load x), cst)) -> + // (and/or/xor (sextload x), (sext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.sext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. @@ -3882,7 +3964,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3957,27 +4039,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, - N->getDebugLoc(), VT, SOp)); - } + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); + // fold (zext (and/or/xor (load x), cst)) -> + // (and/or/xor (zextload x), (zext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::SEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - - return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4012,7 +4115,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT EltVT = VT.getVectorElementType(); SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), DAG.getConstant(1, EltVT)); - if (VT.getSizeInBits() == N0VT.getSizeInBits()) { + if (VT.getSizeInBits() == N0VT.getSizeInBits()) // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, @@ -4024,25 +4127,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { cast<CondCodeSDNode>(N0.getOperand(2))->get()), DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &OneOps[0], OneOps.size())); - } else { - // If the desired elements are smaller or larger than the source - // elements we can use a matching integer vector type and then - // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, - &OneOps[0], OneOps.size())); - } + + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc @@ -4110,7 +4212,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4166,27 +4268,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -6265,6 +6348,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isNonTemporal(), OrigAlign); } + // Turn 'store undef, Ptr' -> nothing. + if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + return Chain; + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { // NOTE: If the original store is volatile, this transform must not increase @@ -6298,8 +6385,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); - } else if (!ST->isVolatile() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. @@ -6393,8 +6482,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" SDValue Shorter = GetDemandedBits(Value, - APInt::getLowBitsSet(Value.getValueSizeInBits(), - ST->getMemoryVT().getSizeInBits())); + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, @@ -6486,18 +6576,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { - // Check if the result type doesn't match the inserted element type. A - // SCALAR_TO_VECTOR may truncate the inserted element and the - // EXTRACT_VECTOR_ELT may widen the extracted vector. - SDValue InOp = InVec.getOperand(0); - EVT NVT = N->getValueType(0); - if (InOp.getValueType() != NVT) { - assert(InOp.getValueType().isInteger() && NVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); - } - return InOp; - } + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Check if the result type doesn't match the inserted element type. A + // SCALAR_TO_VECTOR may truncate the inserted element and the + // EXTRACT_VECTOR_ELT may widen the extracted vector. + SDValue InOp = InVec.getOperand(0); + EVT NVT = N->getValueType(0); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + } + return InOp; + } // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. @@ -6558,7 +6648,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } - if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. @@ -7497,18 +7587,17 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, SrcValueAlign = LD->getOriginalAlignment(); TBAAInfo = LD->getTBAAInfo(); return true; - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + } + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); Size = ST->getMemoryVT().getSizeInBits() >> 3; SrcValue = ST->getSrcValue(); SrcValueOffset = ST->getSrcValueOffset(); SrcValueAlign = ST->getOriginalAlignment(); TBAAInfo = ST->getTBAAInfo(); - } else { - llvm_unreachable("FindAliasInfo expected a memory operand"); + return false; } - - return false; + llvm_unreachable("FindAliasInfo expected a memory operand"); } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, @@ -7621,13 +7710,13 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // Accumulate all the aliases to this node. GatherAllAliases(N, OldChain, Aliases); - if (Aliases.size() == 0) { - // If no operands then chain to entry token. + // If no operands then chain to entry token. + if (Aliases.size() == 0) return DAG.getEntryNode(); - } else if (Aliases.size() == 1) { - // If a single operand then chain to it. We don't need to revisit it. + + // If a single operand then chain to it. We don't need to revisit it. + if (Aliases.size() == 1) return Aliases[0]; - } // Construct a custom tailored token factor. return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index ea8ace3..797f174 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -43,6 +43,8 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -109,8 +111,8 @@ unsigned FastISel::getRegForValue(const Value *V) { // of whether FastISel can handle them. MVT VT = RealVT.getSimpleVT(); if (!TLI.isTypeLegal(VT)) { - // Promote MVT::i1 to a legal type though, because it's common and easy. - if (VT == MVT::i1) + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else return 0; @@ -121,10 +123,9 @@ unsigned FastISel::getRegForValue(const Value *V) { // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) { - unsigned Reg = I->second; - return Reg; - } + if (I != FuncInfo.ValueMap.end()) + return I->second; + unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; @@ -164,8 +165,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - // Try to emit the constant directly. - Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + if (CF->isNullValue()) { + Reg = TargetMaterializeFloatZero(CF); + } else { + // Try to emit the constant directly. + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + } if (!Reg) { // Try to emit the constant by using an integer constant with a cast. @@ -230,10 +235,10 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { /// NOTE: This is only necessary because we might select a block that uses /// a value before we select the block that defines the value. It might be /// possible to fix this by selecting blocks in reverse postorder. -unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { +void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { if (!isa<Instruction>(I)) { LocalValueMap[I] = Reg; - return Reg; + return; } unsigned &AssignedReg = FuncInfo.ValueMap[I]; @@ -242,12 +247,11 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { AssignedReg = Reg; else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. - FuncInfo.RegFixups[AssignedReg] = Reg; + for (unsigned i = 0; i < NumRegs; i++) + FuncInfo.RegFixups[AssignedReg+i] = Reg+i; AssignedReg = Reg; } - - return AssignedReg; } std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { @@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { return false; } + // Check if the first operand is a constant, and handle it as "ri". At -O0, + // we don't have anything that canonicalizes operand order. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) return false; + + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, + Op1IsKill, CI->getZExtValue(), + VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op0IsKill, - CI->getZExtValue()); - if (ResultReg != 0) { - // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); - return true; + uint64_t Imm = CI->getZExtValue(); + + // Transform "sdiv exact X, 8" -> "sra X, 3". + if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && + cast<BinaryOperator>(I)->isExact() && + isPowerOf2_64(Imm)) { + Imm = Log2_64(Imm); + ISDOpcode = ISD::SRA; } + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Op0IsKill, Imm, VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; } // Check if the second operand is a constant float. @@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) { } bool FastISel::SelectCall(const User *I) { - const Function *F = cast<CallInst>(I)->getCalledFunction(); + const CallInst *Call = cast<CallInst>(I); + + // Handle simple inline asms. + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) { + // Don't attempt to handle constraints. + if (!IA->getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::INLINEASM)) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); + return true; + } + + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. - unsigned IID = F->getIntrinsicID(); - switch (IID) { + switch (F->getIntrinsicID()) { default: break; case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || !FuncInfo.MF->getMMI().hasDebugInfo()) return true; @@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast<DbgValueInst>(I); + const DbgValueInst *DI = cast<DbgValueInst>(Call); const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { @@ -523,65 +575,68 @@ bool FastISel::SelectCall(const User *I) { return true; } case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { - default: break; - case TargetLowering::Expand: { - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(I, ResultReg); - return true; - } - } - break; + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) + break; + + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + UpdateValueMap(Call, ResultReg); + return true; } case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { - default: break; - case TargetLowering::Expand: { - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) + break; + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); + else { #ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(cast<CallInst>(I)); + FuncInfo.CatchInfoLost.insert(Call); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - + // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(I); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; + if (Reg) FuncInfo.MBB->addLiveIn(Reg); + } - UpdateValueMap(I, ResultReg); + unsigned Reg = TLI.getExceptionSelectorRegister(); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + + bool ResultRegIsKill = hasTrivialKill(Call); + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg, ResultRegIsKill); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; - return true; - } - } - break; + UpdateValueMap(Call, ResultReg); + + return true; + } + case Intrinsic::objectsize: { + ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); + unsigned long long Res = CI->isZero() ? -1ULL : 0; + Constant *ResCI = ConstantInt::get(Call->getType(), Res); + unsigned ResultReg = getRegForValue(ResCI); + if (ResultReg == 0) + return false; + UpdateValueMap(Call, ResultReg); + return true; } } @@ -598,21 +653,13 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { // Unhandled type. Halt "fast" selection and bail. return false; - // Check if the destination type is legal. Or as a special case, - // it may be i1 if we're doing a truncate because that's - // easy and somewhat common. + // Check if the destination type is legal. if (!TLI.isTypeLegal(DstVT)) - if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE) - // Unhandled type. Halt "fast" selection and bail. - return false; + return false; - // Check if the source operand is legal. Or as a special case, - // it may be i1 if we're doing zero-extension because that's - // easy and somewhat common. + // Check if the source operand is legal. if (!TLI.isTypeLegal(SrcVT)) - if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND) - // Unhandled type. Halt "fast" selection and bail. - return false; + return false; unsigned InputReg = getRegForValue(I->getOperand(0)); if (!InputReg) @@ -621,18 +668,6 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); - // If the operand is i1, arrange for the high bits in the register to be zero. - if (SrcVT == MVT::i1) { - SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); - InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill); - if (!InputReg) - return false; - InputRegIsKill = true; - } - // If the result is i1, truncate to the target's type for i1 first. - if (DstVT == MVT::i1) - DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT); - unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, @@ -784,6 +819,47 @@ FastISel::SelectFNeg(const User *I) { } bool +FastISel::SelectExtractValue(const User *U) { + const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); + if (!EVI) + return false; + + // Make sure we only try to handle extracts with a legal result. But also + // allow i1 because it's easy. + EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true); + if (!RealVT.isSimple()) + return false; + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT) && VT != MVT::i1) + return false; + + const Value *Op0 = EVI->getOperand(0); + const Type *AggTy = Op0->getType(); + + // Get the base result register. + unsigned ResultReg; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0); + if (I != FuncInfo.ValueMap.end()) + ResultReg = I->second; + else if (isa<Instruction>(Op0)) + ResultReg = FuncInfo.InitializeRegForValue(Op0); + else + return false; // fast-isel can't handle aggregate constants at the moment + + // Get the actual result register, which is an offset from the base register. + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end()); + + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + + for (unsigned i = 0; i < VTIndex; i++) + ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); + + UpdateValueMap(EVI, ResultReg); + return true; +} + +bool FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { case Instruction::Add: @@ -887,6 +963,9 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { return true; } + case Instruction::ExtractValue: + return SelectExtractValue(I); + case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); @@ -966,59 +1045,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { + // If this is a multiply by a power of two, emit this as a shift left. + if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { + Opcode = ISD::SHL; + Imm = Log2_64(Imm); + } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { + // div x, 8 -> srl x, 3 + Opcode = ISD::SRL; + Imm = Log2_64(Imm); + } + + // Horrible hack (to be removed), check to make sure shift amounts are + // in-range. + if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && + Imm >= VT.getSizeInBits()) + return 0; + // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - if (MaterialReg == 0) - return 0; - return FastEmit_rr(VT, VT, Opcode, - Op0, Op0IsKill, - MaterialReg, /*Kill=*/true); -} - -/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries -/// to emit an instruction with a floating-point immediate operand using -/// FastEmit_rf. If that fails, it materializes the immediate into a register -/// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm, MVT ImmType) { - // First check if immediate type is legal. If not, we can't use the rf form. - unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); - if (ResultReg != 0) - return ResultReg; - - // Materialize the constant in a register. - unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); if (MaterialReg == 0) { - // If the target doesn't have a way to directly enter a floating-point - // value into a register, use an alternate approach. - // TODO: The current approach only supports floating-point constants - // that can be constructed by conversion from integer values. This should - // be replaced by code that creates a load from a constant-pool entry, - // which will require some target-specific work. - const APFloat &Flt = FPImm->getValueAPF(); - EVT IntVT = TLI.getPointerTy(); - - uint64_t x[2]; - uint32_t IntBitWidth = IntVT.getSizeInBits(); - bool isExact; - (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); - if (!isExact) - return 0; - APInt IntVal(IntBitWidth, 2, x); - - unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::Constant, IntVal.getZExtValue()); - if (IntegerReg == 0) - return 0; - MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, - ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); - if (MaterialReg == 0) - return 0; + // This is a bit ugly/slow, but failing here means falling out of + // fast-isel, which would be very slow. + const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), + VT.getSizeInBits()); + MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, @@ -1078,6 +1131,30 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -1183,6 +1260,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { @@ -1238,7 +1332,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its - // own moves. Second, this check is necessary becuase FastISel doesn't + // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index d8a5770..d518b5d 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -54,25 +54,6 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } -/// isOnlyUsedInEntryBlock - If the specified argument is only used in the -/// entry block, return true. This includes arguments used by switches, since -/// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { - // With FastISel active, we may be splitting blocks, so force creation - // of virtual registers for all non-dead arguments. - if (EnableFastISel) - return A->use_empty(); - - const BasicBlock *Entry = A->getParent()->begin(); - for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { - const User *U = *UI; - if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) - return false; // Use not in entry block. - } - return true; -} - FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) : TLI(tli) { } @@ -86,16 +67,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(Fn->getReturnType(), Fn->getAttributes().getRetAttributes(), Outs, TLI); - CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(), + CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, + Fn->isVarArg(), Outs, Fn->getContext()); - // Create a vreg for each argument register that is not dead and is used - // outside of the entry block for the function. - for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); - AI != E; ++AI) - if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) - InitializeRegForValue(AI); - // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. @@ -181,6 +156,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { const PHINode *PN = dyn_cast<PHINode>(I); ++I) { if (PN->use_empty()) continue; + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + DebugLoc DL = PN->getDebugLoc(); unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); @@ -343,7 +322,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { APInt Zero(BitWidth, 0); DestLOI.KnownZero = Zero; DestLOI.KnownOne = Zero; - return; + return; } if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { @@ -375,18 +354,18 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { /// setByValArgumentFrameIndex - Record frame index for the byval /// argument. This overrides previous frame index entry for this argument, /// if any. -void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, +void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, int FI) { assert (A->hasByValAttr() && "Argument does not have byval attribute!"); ByValArgFrameIndexMap[A] = FI; } - + /// getByValArgumentFrameIndex - Get frame index for the byval argument. /// If the argument does not have any assigned frame index then 0 is /// returned. int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) { assert (A->hasByValAttr() && "Argument does not have byval attribute!"); - DenseMap<const Argument *, int>::iterator I = + DenseMap<const Argument *, int>::iterator I = ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e309def..2a65d65 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; + EVT VT = Node->getValueType(ResNo); + + // Stick to the preferred register classes for legal types. + if (TLI->isTypeLegal(VT)) + UseRC = TLI->getRegClassFor(VT); + if (!IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { @@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, break; } - EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); - + // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI->getRegClass(VRBase); @@ -283,7 +288,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && "Don't have operand info for this instruction!"); - if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { + if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, void InstrEmitter::EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { - const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned NewVReg = MRI->createVirtualRegister(RC); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); unsigned NumOps = Node->getNumOperands(); - assert((NumOps & 1) == 0 && - "REG_SEQUENCE must have an even number of operands!"); + assert((NumOps & 1) == 1 && + "REG_SEQUENCE must have an odd number of operands!"); const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); - for (unsigned i = 0; i != NumOps; ++i) { + for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); - if (i & 1) { + if ((i & 1) == 0) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b837261..4b6d3ef 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -14,23 +14,16 @@ #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" #include "llvm/LLVMContext.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -57,19 +50,13 @@ class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - CodeGenOpt::Level OptLevel; // Libcall insertion helpers. - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been /// legalized. We use this to ensure that calls are properly serialized /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; - - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; + SmallVector<SDValue, 8> LastCALLSEQ; enum LegalizeAction { Legal, // The target natively supports this operation. @@ -98,13 +85,13 @@ class SelectionDAGLegalize { } public: - SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol); + explicit SelectionDAGLegalize(SelectionDAG &DAG); /// getTypeAction - Return how we should legalize values of this type, either /// it is already legal or we need to expand it into multiple registers of /// smaller integer type, or we need to promote it to a larger type. LegalizeAction getTypeAction(EVT VT) const { - return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT); } /// isTypeLegal - Return true if this type is legal on this target. @@ -147,6 +134,9 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, DebugLoc dl); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, @@ -158,7 +148,7 @@ private: RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); - SDValue ExpandDivRemLibCall(SDNode *Node, bool isSigned, bool isDIV); + void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); @@ -184,6 +174,15 @@ private: void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); } + void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; } + void pushLastCALLSEQ(SDValue s) { + LastCALLSEQ.push_back(s); + } + void popLastCALLSEQ() { + LastCALLSEQ.pop_back(); + } }; } @@ -219,18 +218,16 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); } -SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, - CodeGenOpt::Level ol) +SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag), OptLevel(ol), + DAG(dag), ValueTypeActions(TLI.getValueTypeActions()) { assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && "Too many value types for ValueTypeActions to hold!"); } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; + pushLastCALLSEQ(DAG.getEntryNode()); // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we @@ -258,14 +255,15 @@ void SelectionDAGLegalize::LegalizeDAG() { /// FindCallEndFromCallStart - Given a chained node that is part of a call /// sequence, find the CALLSEQ_END node that terminates the call sequence. static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. + int next_depth = depth; if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) + next_depth = depth + 1; + if (Node->getOpcode() == ISD::CALLSEQ_END) { + assert(depth > 0 && "negative depth!"); + if (depth == 1) return Node; + else + next_depth = depth - 1; } if (Node->use_empty()) return 0; // No CallSeqEnd @@ -296,7 +294,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) + if (SDNode *Result = FindCallEndFromCallStart(User, next_depth)) return Result; } return 0; @@ -317,6 +315,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) { case ISD::CALLSEQ_START: if (!nested) return Node; + Node = Node->getOperand(0).getNode(); nested--; break; case ISD::CALLSEQ_END: @@ -324,7 +323,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) { break; } } - return 0; + return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0; } /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to @@ -433,68 +432,67 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), Alignment); - } else { - // Do a (aligned) store to a stack slot, then copy from the stack slot - // to the final destination using (unaligned) integer loads and stores. - EVT StoredVT = ST->getMemoryVT(); - EVT RegVT = - TLI.getRegisterType(*DAG.getContext(), - EVT::getIntegerVT(*DAG.getContext(), - StoredVT.getSizeInBits())); - unsigned StoredBytes = StoredVT.getSizeInBits() / 8; - unsigned RegBytes = RegVT.getSizeInBits() / 8; - unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; - - // Make sure the stack slot is also aligned for the register type. - SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); - - // Perform the original store, only redirected to the stack slot. - SDValue Store = DAG.getTruncStore(Chain, dl, - Val, StackPtr, MachinePointerInfo(), - StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); - SmallVector<SDValue, 8> Stores; - unsigned Offset = 0; - - // Do all but one copies using the full register width. - for (unsigned i = 1; i < NumRegs; i++) { - // Load one integer register's worth from the stack slot. - SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, - MachinePointerInfo(), - false, false, 0); - // Store it to the final location. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo().getWithOffset(Offset), - ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); - // Increment the pointers. - Offset += RegBytes; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - Increment); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - } + } + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + EVT StoredVT = ST->getMemoryVT(); + EVT RegVT = + TLI.getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; - // The last store may be partial. Do a truncating store. On big-endian - // machines this requires an extending load from the stack slot to ensure - // that the bits are in the right place. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (StoredBytes - Offset)); + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); - // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - MachinePointerInfo(), - MemVT, false, false, 0); + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, + Val, StackPtr, MachinePointerInfo(), + StoredVT, false, false, 0); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo() - .getWithOffset(Offset), - MemVT, ST->isVolatile(), - ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); - // The order of the stores doesn't matter - say it with a TokenFactor. - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, + MachinePointerInfo(), + false, false, 0); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), + ST->isVolatile(), ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo(), + MemVT, false, false, 0); + + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo() + .getWithOffset(Offset), + MemVT, ST->isVolatile(), + ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // The order of the stores doesn't matter - say it with a TokenFactor. + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -941,11 +939,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::BR_JT: case ISD::BR_CC: case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + // Branches tweak the chain to include LastCALLSEQ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); + getLastCALLSEQ()); Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); + setLastCALLSEQ(DAG.getEntryNode()); break; case ISD::SHL: case ISD::SRL: @@ -1034,6 +1033,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case ISD::CALLSEQ_START: { SDNode *CallEnd = FindCallEndFromCallStart(Node); + assert(CallEnd && "didn't find CALLSEQ_END!"); // Recursively Legalize all of the inputs of the call end that do not lead // to this call start. This ensures that any libcalls that need be inserted @@ -1050,9 +1050,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Merge in the last call to ensure that this call starts after the last // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); + Tmp1, getLastCALLSEQ()); Tmp1 = LegalizeOp(Tmp1); } @@ -1073,25 +1073,29 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // sequence have been legalized, legalize the call itself. During this // process, no libcalls can/will be inserted, guaranteeing that no calls // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; + setLastCALLSEQ(SDValue(CallEnd, 0)); // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + LegalizeOp(getLastCALLSEQ()); return Result; } case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; + { + SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node); + + // If the CALLSEQ_START node hasn't been legalized first, legalize it. + // This will cause this node to be legalized as well as handling libcalls + // right. + if (getLastCALLSEQ().getNode() != Node) { + LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0)); + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0)); } // Otherwise, the call start has been legalized and everything is going @@ -1119,9 +1123,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getResNo()); } } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); // This finishes up call legalization. - IsLegalizingCall = false; + popLastCALLSEQ(); // If the CALLSEQ_END node has a flag, remember that we legalized it. AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); @@ -1371,6 +1374,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } + + // If this is a promoted vector load, and the vector element types are + // legal, then scalarize it. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && + isTypeLegal(Node->getValueType(0).getScalarType())) { + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0).getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + Node->getValueType(0), &LoadVals[0], LoadVals.size()); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + } + + // If this is a promoted vector load, and the vector element types are + // illegal, create the promoted vector from bitcasted segments. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) { + EVT MemElemTy = Node->getValueType(0).getScalarType(); + EVT SrcSclrTy = SrcVT.getScalarType(); + unsigned SizeRatio = + (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits()); + + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + SrcVT.getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + if (TLI.isBigEndian()) { + // MSB (which is garbage, comes first) + LoadVals.push_back(ScalarLoad.getValue(0)); + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + } else { + // LSB (which is data, comes first) + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + LoadVals.push_back(ScalarLoad.getValue(0)); + } + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getScalarType(), NumElem*SizeRatio); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + TempWideVector, &LoadVals[0], LoadVals.size()); + + // Cast to the correct type + ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + + } + // FIXME: This does not work for vectors on most targets. Sign- and // zero-extend operations are currently folded into extending loads, // whether they are legal or not, and then we end up here without any @@ -1546,6 +1634,88 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result = TLI.LowerOperation(Result, DAG); break; case Expand: + + EVT WideScalarVT = Tmp3.getValueType().getScalarType(); + EVT NarrowScalarVT = StVT.getScalarType(); + + // The Store type is illegal, must scalarize the vector store. + SmallVector<SDValue, 8> Stores; + bool ScalarLegal = isTypeLegal(WideScalarVT); + if (!isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) { + unsigned NumElem = StVT.getVectorNumElements(); + + unsigned ScalarSize = StVT.getScalarType().getSizeInBits(); + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + // Types smaller than 8 bits are promoted to 8 bits. + ScalarSize = std::max<unsigned>(ScalarSize, 8); + // Store stride + unsigned Stride = ScalarSize/8; + assert(isPowerOf2_32(Stride) && "Stride must be a power of two"); + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + + + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); + + Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // The Store type is illegal, must scalarize the vector store. + // However, the scalar type is illegal. Must bitcast the result + // and store it in smaller parts. + if (!isTypeLegal(StVT) && StVT.isVector()) { + unsigned WideNumElem = StVT.getVectorNumElements(); + unsigned Stride = NarrowScalarVT.getSizeInBits()/8; + + unsigned SizeRatio = + (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits()); + + EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT, + SizeRatio*WideNumElem); + + // Cast the wide elem vector to wider vec with smaller elem type. + // Example <2 x i64> -> <4 x i32> + Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); + + for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) { + // Extract elment i + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + // bump pointer. + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + + // Store if, this element is: + // - First element on big endian, or + // - Last element on little endian + if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) || + ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) { + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // TRUNCSTORE:i16 i32 -> STORE i16 assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); @@ -2007,7 +2177,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -2043,9 +2212,43 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return DAG.getRoot(); // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// and returning a result of type RetVT. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + std::pair<SDValue,SDValue> CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + + // Legalize the call sequence, starting with the chain. This will advance // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); + return CallInfo.first; } @@ -2055,7 +2258,6 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -2081,7 +2283,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); return CallInfo; @@ -2121,10 +2323,9 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } -/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem -/// pairs. -SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned, - bool isDIV) { +/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { default: assert(0 && "Unexpected request for libcall!"); @@ -2135,17 +2336,18 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned, case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } - if (!TLI.getLibcallName(LC)) - return SDValue(); + return TLI.getLibcallName(LC) != 0; +} - // Only issue divrem libcall if both quotient and remainder are needed. +/// UseDivRem - Only issue divrem libcall if both quotient and remainder are +/// needed. +static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { unsigned OtherOpcode = 0; - if (isSigned) { + if (isSigned) OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; - } else { + else OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; - } - SDNode *OtherNode = 0; + SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), @@ -2155,32 +2357,28 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned, continue; if (User->getOpcode() == OtherOpcode && User->getOperand(0) == Op0 && - User->getOperand(1) == Op1) { - OtherNode = User; - break; - } + User->getOperand(1) == Op1) + return true; } - if (!OtherNode) - return SDValue(); + return false; +} - // If the libcall is already generated, no need to issue it again. - DenseMap<SDValue, SDValue>::iterator I - = LegalizedNodes.find(SDValue(OtherNode,0)); - if (I != LegalizedNodes.end()) { - OtherNode = I->second.getNode(); - SDNode *Chain = OtherNode->getOperand(0).getNode(); - for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); - UI != UE; ++UI) { - SDNode *User = *UI; - if (User == OtherNode) - continue; - if (isDIV) { - assert(User->getOpcode() == ISD::CopyFromReg); - } else { - assert(User->getOpcode() == ISD::LOAD); - } - return SDValue(User, 0); - } +/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem +/// pairs. +void +SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + unsigned Opcode = Node->getOpcode(); + bool isSigned = Opcode == ISD::SDIVREM; + + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } // The input chain to this libcall is the entry node of the function. @@ -2221,14 +2419,15 @@ SDValue SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, + SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr, MachinePointerInfo(), false, false, 0); - return isDIV ? CallInfo.first : Rem; + Results.push_back(CallInfo.first); + Results.push_back(Rem); } /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a @@ -2878,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::FP_ROUND_INREG: { // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targetting a temporary location (a stack slot). + // EXTLOAD pair, targeting a temporary location (a stack slot). // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create @@ -3204,28 +3403,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); - } else if (isSigned) { - Tmp1 = ExpandDivRemLibCall(Node, true, false); - if (!Tmp1.getNode()) - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SREM_I8, - RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128); - } else { - Tmp1 = ExpandDivRemLibCall(Node, false, false); - if (!Tmp1.getNode()) - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UREM_I8, - RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128); - } + } else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128); Results.push_back(Tmp1); break; } @@ -3235,26 +3431,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, true))) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); - else if (isSigned) { - Tmp1 = ExpandDivRemLibCall(Node, true, true); - if (!Tmp1.getNode()) { - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128); - } - } else { - Tmp1 = ExpandDivRemLibCall(Node, false, true); - if (!Tmp1.getNode()) { - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128); - } - } + else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128); Results.push_back(Tmp1); break; } @@ -3271,6 +3462,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1.getValue(1)); break; } + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3355,6 +3551,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::UMULO: case ISD::SMULO: { EVT VT = Node->getValueType(0); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; @@ -3372,7 +3569,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TopHalf = BottomHalf.getValue(1); } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3385,7 +3581,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // have a libcall big enough. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (WideVT == MVT::i16) LC = RTLIB::MUL_I16; @@ -3396,15 +3591,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); - RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); - SDValue Ret = ExpandLibCall(LC, Node, isSigned); - BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); - TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, - DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); - TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(1)); } + if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); @@ -3486,9 +3693,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp2.getOperand(0), Tmp2.getOperand(1), Node->getOperand(2)); } else { + // We test only the i1 bit. Skip the AND if UNDEF. + Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 : + DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, + DAG.getConstant(1, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, - DAG.getCondCode(ISD::SETNE), Tmp2, - DAG.getConstant(0, Tmp2.getValueType()), + DAG.getCondCode(ISD::SETNE), Tmp3, + DAG.getConstant(0, Tmp3.getValueType()), Node->getOperand(2)); } Results.push_back(Tmp1); @@ -3539,7 +3750,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + setLastCALLSEQ(DAG.getEntryNode()); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3697,9 +3909,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, // SelectionDAG::Legalize - This is the entry point for the file. // -void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) { +void SelectionDAG::Legalize() { /// run - This is the main entry point to this class. /// - SelectionDAGLegalize(*this, OptLevel).LegalizeDAG(); + SelectionDAGLegalize(*this).LegalizeDAG(); } - diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 935aab0..da75b8a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -73,6 +73,17 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + case ISD::EXTRACT_SUBVECTOR: + Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; + case ISD::VECTOR_SHUFFLE: + Res = PromoteIntRes_VECTOR_SHUFFLE(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break; + case ISD::BUILD_VECTOR: + Res = PromoteIntRes_BUILD_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; @@ -174,24 +185,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { default: assert(false && "Unknown type action!"); break; - case Legal: + case TargetLowering::TypeLegal: break; - case PromoteInteger: + case TargetLowering::TypePromoteInteger: if (NOutVT.bitsEq(NInVT)) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); + if (NInVT.isVector()) + // Promote vector element via memory load/store. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + CreateStackStoreLoad(InOp, OutVT)); break; - case SoftenFloat: + case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); - case ExpandInteger: - case ExpandFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: break; - case ScalarizeVector: + case TargetLowering::TypeScalarizeVector: // Convert the element to an integer and promote it by hand. - return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - BitConvertToInteger(GetScalarizedVector(InOp))); - case SplitVector: { + if (!NOutVT.isVector()) + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + break; + case TargetLowering::TypeSplitVector: { // For example, i32 = BITCAST v2i16 on alpha. Convert the split // pieces of the input into integers and reassemble in the final type. SDValue Lo, Hi; @@ -208,7 +225,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { JoinIntegers(Lo, Hi)); return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); } - case WidenVector: + case TargetLowering::TypeWidenVector: if (OutVT.bitsEq(NInVT)) // The input is widened to the same size. Convert to the widened value. return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp)); @@ -342,7 +359,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); - if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) { + if (getTypeAction(N->getOperand(0).getValueType()) + == TargetLowering::TypePromoteInteger) { SDValue Res = GetPromotedInteger(N->getOperand(0)); assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); @@ -507,11 +525,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { switch (getTypeAction(N->getOperand(0).getValueType())) { default: llvm_unreachable("Unknown type action!"); - case Legal: - case ExpandInteger: + case TargetLowering::TypeLegal: + case TargetLowering::TypeExpandInteger: Res = N->getOperand(0); break; - case PromoteInteger: + case TargetLowering::TypePromoteInteger: Res = GetPromotedInteger(N->getOperand(0)); break; } @@ -557,9 +575,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { DebugLoc DL = N->getDebugLoc(); EVT SmallVT = LHS.getValueType(); - // To determine if the result overflowed in a larger type, we extend the input - // to the larger type, do the multiply, then check the high bits of the result - // to see if the overflow happened. + // To determine if the result overflowed in a larger type, we extend the + // input to the larger type, do the multiply, then check the high bits of + // the result to see if the overflow happened. if (N->getOpcode() == ISD::SMULO) { LHS = SExtPromotedInteger(LHS); RHS = SExtPromotedInteger(RHS); @@ -569,8 +587,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { } SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); - // Overflow occurred iff the high part of the result does not zero/sign-extend - // the low part. + // Overflow occurred iff the high part of the result does not + // zero/sign-extend the low part. SDValue Overflow; if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred iff the high part is non-zero. @@ -672,6 +690,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONVERT_RNDSAT: Res = PromoteIntOp_CONVERT_RNDSAT(N); break; case ISD::INSERT_VECTOR_ELT: @@ -952,7 +972,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); - return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); + return DAG.getZeroExtendInReg(Op, dl, + N->getOperand(0).getValueType().getScalarType()); } @@ -1513,7 +1534,8 @@ void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. - assert(getTypeAction(Op.getValueType()) == PromoteInteger && + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && "Only know how to promote this result!"); SDValue Res = GetPromotedInteger(Op); assert(Res.getValueType() == N->getValueType(0) && @@ -2030,7 +2052,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. - assert(getTypeAction(Op.getValueType()) == PromoteInteger && + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && "Only know how to promote this result!"); SDValue Res = GetPromotedInteger(Op); assert(Res.getValueType() == N->getValueType(0) && @@ -2178,7 +2201,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. - assert(getTypeAction(Op.getValueType()) == PromoteInteger && + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && "Only know how to promote this result!"); SDValue Res = GetPromotedInteger(Op); assert(Res.getValueType() == N->getValueType(0) && @@ -2613,3 +2637,158 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { "Don't know how to expand this UINT_TO_FP!"); return MakeLibCall(LC, DstVT, &Op, 1, true, dl); } + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned OutNumElems = N->getValueType(0).getVectorNumElements(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + SDValue BaseIdx = N->getOperand(1); + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != OutNumElems; ++i) { + + // Extract the element from the original vector. + SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), + BaseIdx, DAG.getIntPtrConstant(i)); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getVectorElementType(), N->getOperand(0), Index); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext); + // Insert the converted element to the new vector. + Ops.push_back(Op); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { + + ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + NewMask.push_back(SV->getMaskElt(i)); + } + + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = GetPromotedInteger(N->getOperand(1)); + EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + + return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { + + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned NumElems = N->getNumOperands(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + Ops.push_back(Op); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { + + DebugLoc dl = N->getDebugLoc(); + + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + assert(!InVT.isVector() && "Input must not be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0)); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { + + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + EVT InElVT = InVT.getVectorElementType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + + SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0); + + SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, + NOutVTElem, N->getOperand(1)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,NOutVT, + ConvertedVector, ConvElem, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = N->getOperand(1); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + V0->getValueType(0).getScalarType(), V0, V1); + + return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext); + +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { + + DebugLoc dl = N->getDebugLoc(); + + EVT RetSclrTy = N->getValueType(0).getVectorElementType(); + + SmallVector<SDValue, 8> NewOps; + + // For each incoming vector + for (unsigned VecIdx = 0, E = N->getNumOperands(); VecIdx!= E; ++VecIdx) { + SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx)); + EVT SclrTy = Incoming->getValueType(0).getVectorElementType(); + unsigned NumElem = Incoming->getValueType(0).getVectorNumElements(); + + for (unsigned i=0; i<NumElem; ++i) { + // Extract element from incoming vector + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, + Incoming, DAG.getIntPtrConstant(i)); + SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); + NewOps.push_back(Tr); + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), + &NewOps[0], NewOps.size()); + } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index cedda7e..ba658b0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -224,38 +224,38 @@ bool DAGTypeLegalizer::run() { switch (getTypeAction(ResultVT)) { default: assert(false && "Unknown action!"); - case Legal: + case TargetLowering::TypeLegal: break; // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results // with a legal type). Results can be remapped using ReplaceValueWith, // or their promoted/expanded/etc values registered in PromotedIntegers, // ExpandedIntegers etc. - case PromoteInteger: + case TargetLowering::TypePromoteInteger: PromoteIntegerResult(N, i); Changed = true; goto NodeDone; - case ExpandInteger: + case TargetLowering::TypeExpandInteger: ExpandIntegerResult(N, i); Changed = true; goto NodeDone; - case SoftenFloat: + case TargetLowering::TypeSoftenFloat: SoftenFloatResult(N, i); Changed = true; goto NodeDone; - case ExpandFloat: + case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; goto NodeDone; - case ScalarizeVector: + case TargetLowering::TypeScalarizeVector: ScalarizeVectorResult(N, i); Changed = true; goto NodeDone; - case SplitVector: + case TargetLowering::TypeSplitVector: SplitVectorResult(N, i); Changed = true; goto NodeDone; - case WidenVector: + case TargetLowering::TypeWidenVector: WidenVectorResult(N, i); Changed = true; goto NodeDone; @@ -277,36 +277,36 @@ ScanOperands: switch (getTypeAction(OpVT)) { default: assert(false && "Unknown action!"); - case Legal: + case TargetLowering::TypeLegal: continue; // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's // operands in place, and return "true". - case PromoteInteger: + case TargetLowering::TypePromoteInteger: NeedsReanalyzing = PromoteIntegerOperand(N, i); Changed = true; break; - case ExpandInteger: + case TargetLowering::TypeExpandInteger: NeedsReanalyzing = ExpandIntegerOperand(N, i); Changed = true; break; - case SoftenFloat: + case TargetLowering::TypeSoftenFloat: NeedsReanalyzing = SoftenFloatOperand(N, i); Changed = true; break; - case ExpandFloat: + case TargetLowering::TypeExpandFloat: NeedsReanalyzing = ExpandFloatOperand(N, i); Changed = true; break; - case ScalarizeVector: + case TargetLowering::TypeScalarizeVector: NeedsReanalyzing = ScalarizeVectorOperand(N, i); Changed = true; break; - case SplitVector: + case TargetLowering::TypeSplitVector: NeedsReanalyzing = SplitVectorOperand(N, i); Changed = true; break; - case WidenVector: + case TargetLowering::TypeWidenVector: NeedsReanalyzing = WidenVectorOperand(N, i); Changed = true; break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 5409b88..06dc40f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -57,16 +57,6 @@ public: // 1+ - This is a node which has this many unprocessed operands. }; private: - enum LegalizeAction { - Legal, // The target natively supports this type. - PromoteInteger, // Replace this integer type with a larger one. - ExpandInteger, // Split this integer type into two of half the size. - SoftenFloat, // Convert this float type to a same size integer type. - ExpandFloat, // Split this float type into two of half the size. - ScalarizeVector, // Replace this one-element vector with its element type. - SplitVector, // Split this vector type into two of half the size. - WidenVector // This vector type should be widened into a larger vector. - }; /// ValueTypeActions - This is a bitvector that contains two bits for each /// simple value type, where the two bits correspond to the LegalizeAction @@ -74,41 +64,13 @@ private: TargetLowering::ValueTypeActionImpl ValueTypeActions; /// getTypeAction - Return how we should legalize values of this type. - LegalizeAction getTypeAction(EVT VT) const { - switch (ValueTypeActions.getTypeAction(VT)) { - default: - assert(false && "Unknown legalize action!"); - case TargetLowering::Legal: - return Legal; - case TargetLowering::Promote: - // Promote can mean - // 1) For integers, use a larger integer type (e.g. i8 -> i32). - // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32). - if (!VT.isVector()) - return PromoteInteger; - return WidenVector; - case TargetLowering::Expand: - // Expand can mean - // 1) split scalar in half, 2) convert a float to an integer, - // 3) scalarize a single-element vector, 4) split a vector in two. - if (!VT.isVector()) { - if (VT.isInteger()) - return ExpandInteger; - if (VT.getSizeInBits() == - TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) - return SoftenFloat; - return ExpandFloat; - } - - if (VT.getVectorNumElements() == 1) - return ScalarizeVector; - return SplitVector; - } + TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const { + return TLI.getTypeAction(*DAG.getContext(), VT); } /// isTypeLegal - Return true if this type is legal on this target. bool isTypeLegal(EVT VT) const { - return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; + return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } /// IgnoreNodeResults - Pretend all of this node's results are legal. @@ -239,7 +201,7 @@ private: EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); - return DAG.getZeroExtendInReg(Op, dl, OldVT); + return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } // Integer Result Promotion. @@ -248,6 +210,11 @@ private: SDValue PromoteIntRes_AssertZext(SDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); + SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_BITCAST(SDNode *N); SDValue PromoteIntRes_BSWAP(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); @@ -289,6 +256,9 @@ private: SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); + SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_MEMBARRIER(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index a75ae87..85ea6b6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -43,36 +43,36 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { switch (getTypeAction(InVT)) { default: assert(false && "Unknown type action!"); - case Legal: - case PromoteInteger: + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: break; - case SoftenFloat: + case TargetLowering::TypeSoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - case ExpandInteger: - case ExpandFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - case SplitVector: + case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - case ScalarizeVector: + case TargetLowering::TypeScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - case WidenVector: { + case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0b4dd35..b5698f9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -526,13 +526,13 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, switch (getTypeAction(InVT)) { default: assert(false && "Unknown type action!"); - case Legal: - case PromoteInteger: - case SoftenFloat: - case ScalarizeVector: + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeScalarizeVector: break; - case ExpandInteger: - case ExpandFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: // A scalar to vector conversion, where the scalar needs expansion. // If the vector is being split in two then we can just convert the // expanded pieces. @@ -545,7 +545,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, return; } break; - case SplitVector: + case TargetLowering::TypeSplitVector: // If the input is a vector that needs to be split, convert each split // piece of the input now. GetSplitVector(InOp, Lo, Hi); @@ -774,7 +774,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, EVT InVT = N->getOperand(0).getValueType(); switch (getTypeAction(InVT)) { default: llvm_unreachable("Unexpected type action!"); - case Legal: { + case TargetLowering::TypeLegal: { EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), @@ -783,10 +783,21 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(InNVT.getVectorNumElements())); break; } - case SplitVector: + case TargetLowering::TypePromoteInteger: { + SDValue InOp = GetPromotedInteger(N->getOperand(0)); + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), + InOp.getValueType().getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case TargetLowering::TypeSplitVector: GetSplitVector(N->getOperand(0), Lo, Hi); break; - case WidenVector: { + case TargetLowering::TypeWidenVector: { // If the result needs to be split and the input needs to be widened, // the two types must have different lengths. Use the widened result // and extract from it to do the split. @@ -1439,7 +1450,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - if (getTypeAction(InVT) == WidenVector) { + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); InVTNumElts = InVT.getVectorNumElements(); @@ -1515,7 +1526,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { SDValue ShOp = N->getOperand(1); EVT ShVT = ShOp.getValueType(); - if (getTypeAction(ShVT) == WidenVector) { + if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { ShOp = GetWidenedVector(ShOp); ShVT = ShOp.getValueType(); } @@ -1557,9 +1568,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { default: assert(false && "Unknown type action!"); break; - case Legal: + case TargetLowering::TypeLegal: break; - case PromoteInteger: + case TargetLowering::TypePromoteInteger: // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. InOp = GetPromotedInteger(InOp); @@ -1567,13 +1578,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { if (WidenVT.bitsEq(InVT)) return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; - case SoftenFloat: - case ExpandInteger: - case ExpandFloat: - case ScalarizeVector: - case SplitVector: + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeSplitVector: break; - case WidenVector: + case TargetLowering::TypeWidenVector: // If the InOp is widened to the same size, convert it. Otherwise, fall // out of the switch and widen the widened input. InOp = GetWidenedVector(InOp); @@ -1653,7 +1664,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { unsigned NumOperands = N->getNumOperands(); bool InputWidened = false; // Indicates we need to widen the input. - if (getTypeAction(InVT) != WidenVector) { + if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { // Add undef vectors to widen to correct length. unsigned NumConcat = WidenVT.getVectorNumElements() / @@ -1732,7 +1743,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - if (getTypeAction(InVT) == WidenVector) { + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(InOp); InVT = InOp.getValueType(); InVTNumElts = InVT.getVectorNumElements(); @@ -1800,7 +1811,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue Idx = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); - if (getTypeAction(InOp.getValueType()) == WidenVector) + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); @@ -1882,7 +1893,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT CondEltVT = CondVT.getVectorElementType(); EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts); - if (getTypeAction(CondVT) == WidenVector) + if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) Cond1 = GetWidenedVector(Cond1); if (Cond1.getValueType() != CondWidenVT) @@ -2026,7 +2037,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); - if (getTypeAction(InOp.getValueType()) == WidenVector) + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); EVT InEltVT = InVT.getVectorElementType(); @@ -2081,7 +2092,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { unsigned NumOperands = N->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue InOp = N->getOperand(i); - if (getTypeAction(InOp.getValueType()) == WidenVector) + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, @@ -2153,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (MemVT.getSizeInBits() <= WidenEltWidth) break; if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2168,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index b2e9c15..f09b381 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -71,6 +71,7 @@ static cl::opt<bool> DisableSchedCycles( cl::desc("Disable cycle-level precision during preRA scheduling")); // Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. static cl::opt<bool> DisableSchedRegPressure( "disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp")); @@ -80,6 +81,9 @@ static cl::opt<bool> DisableSchedLiveUses( static cl::opt<bool> DisableSchedVRegCycle( "disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks")); +static cl::opt<bool> DisableSchedPhysRegJoin( + "disable-sched-physreg-join", cl::Hidden, cl::init(false), + cl::desc("Disable physreg def-use affinity")); static cl::opt<bool> DisableSchedStalls( "disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp")); @@ -102,11 +106,11 @@ static cl::opt<unsigned> AvgIPC( #ifndef NDEBUG namespace { // For sched=list-ilp, Count the number of times each factor comes into play. - enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic, - FactOther, NumFactors }; + enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, + FactStatic, FactOther, NumFactors }; } static const char *FactorName[NumFactors] = -{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"}; +{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; static int FactorCount[NumFactors]; #endif //!NDEBUG @@ -272,6 +276,33 @@ private: }; } // end anonymous namespace +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + unsigned &RegClass, unsigned &Cost) { + EVT VT = RegDefPos.GetValue(); + + // Special handling for untyped values. These values can only come from + // the expansion of custom DAG-to-DAG patterns. + if (VT == MVT::untyped) { + unsigned Opcode = RegDefPos.GetNode()->getMachineOpcode(); + unsigned Idx = RegDefPos.GetIdx(); + const TargetInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = Desc.getRegClass(Idx, TRI); + RegClass = RC->getID(); + // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a + // better way to determine it. + Cost = 1; + } else { + RegClass = TLI->getRepRegClassFor(VT)->getID(); + Cost = TLI->getRepRegClassCostFor(VT); + } +} /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { @@ -463,6 +494,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { if (DisableSchedCycles) return; + // FIXME: Nodes such as CopyFromReg probably should not advance the current + // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node + // has predecessors the cycle will be advanced when they are scheduled. + // But given the crude nature of modeling latency though such nodes, we + // currently need to treat these nodes like real instructions. + // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); // Bump CurCycle to account for latency. We assume the latency of other @@ -533,6 +571,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { } } +static void resetVRegCycle(SUnit *SU); + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. @@ -542,7 +582,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { #ifndef NDEBUG if (CurCycle < SU->getHeight()) - DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); + DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); #endif // FIXME: Do not modify node height. It may interfere with @@ -559,7 +600,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { AvailableQueue->ScheduledNode(SU); // If HazardRec is disabled, and each inst counts as one cycle, then - // advance CurCycle before ReleasePredecessors to avoid useles pushed to + // advance CurCycle before ReleasePredecessors to avoid useless pushes to // PendingQueue for schedulers that implement HasReadyFilter. if (!HazardRec->isEnabled() && AvgIPC < 2) AdvanceToCycle(CurCycle + 1); @@ -580,20 +621,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } } + resetVRegCycle(SU); + SU->isScheduled = true; // Conditions under which the scheduler should eagerly advance the cycle: // (1) No available instructions // (2) All pipelines full, so available instructions must have hazards. // - // If HazardRec is disabled, the cycle was advanced earlier. + // If HazardRec is disabled, the cycle was pre-advanced before calling + // ReleasePredecessors. In that case, IssueCount should remain 0. // // Check AvailableQueue after ReleasePredecessors in case of zero latency. - ++IssueCount; - if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) - || (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC) - || AvailableQueue->empty()) - AdvanceToCycle(CurCycle + 1); + if (HazardRec->isEnabled() || AvgIPC > 1) { + if (SU->getNode() && SU->getNode()->isMachineOpcode()) + ++IssueCount; + if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) + || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) + AdvanceToCycle(CurCycle + 1); + } } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -989,14 +1035,15 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { // Check if Ref is live. - if (!LiveRegDefs[Reg]) continue; + if (!LiveRegDefs[*AliasI]) continue; // Allow multiple uses of the same def. - if (LiveRegDefs[Reg] == SU) continue; + if (LiveRegDefs[*AliasI] == SU) continue; // Add Reg to the set of interfering live regs. - if (RegAdded.insert(Reg)) - LRegs.push_back(Reg); + if (RegAdded.insert(*AliasI)) { + LRegs.push_back(*AliasI); + } } } @@ -1220,7 +1267,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { - DEBUG(dbgs() << "\n*** Examining Available\n"; + DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); // Pick the best node to schedule taking all constraints into @@ -1349,6 +1396,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } }; +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { + SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} + reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} + + bool operator()(SUnit* left, SUnit* right) const { + // reverse left/right rather than simply !SortFunc(left, right) + // to expose different paths in the comparison logic. + return SortFunc(right, left); + } +}; +#endif // NDEBUG + /// bu_ls_rr_sort - Priority function for bottom up register pressure // reduction scheduler. struct bu_ls_rr_sort : public queue_sort { @@ -1549,20 +1611,33 @@ protected: }; template<class SF> -class RegReductionPriorityQueue : public RegReductionPQBase { - static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { - std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), - E = Q.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Q.end())) - std::swap(*Best, Q.back()); - Q.pop_back(); - return V; +static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG + if (DAG->StressSched) { + reverse_sort<SF> RPicker(Picker); + return popFromQueueImpl(Q, RPicker); } +#endif + (void)DAG; + return popFromQueueImpl(Q, Picker); +} +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { SF Picker; public: @@ -1583,7 +1658,7 @@ public: SUnit *pop() { if (Queue.empty()) return NULL; - SUnit *V = popFromQueue(Queue, Picker); + SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); V->NodeQueueId = 0; return V; } @@ -1593,7 +1668,7 @@ public: std::vector<SUnit*> DumpQueue = Queue; SF DumpPicker = Picker; while (!DumpQueue.empty()) { - SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); if (isBottomUp()) dbgs() << "Height " << SU->getHeight() << ": "; else @@ -1623,6 +1698,20 @@ ILPBURRPriorityQueue; // Static Node Priority for Register Pressure Reduction //===----------------------------------------------------------------------===// +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { + bool LSchedLow = left->isScheduleLow; + bool RSchedLow = right->isScheduleLow; + if (LSchedLow != RSchedLow) + return LSchedLow < RSchedLow ? 1 : -1; + return 0; +} + /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. static unsigned @@ -1661,17 +1750,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() { CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); } -void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); - // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) - PrescheduleNodesWithMultipleUses(); - // Calculate node priorities. - CalculateSethiUllmanNumbers(); -} - void RegReductionPQBase::addNode(const SUnit *SU) { unsigned SUSize = SethiUllmanNumbers.size(); if (SUnits->size() > SUSize) @@ -1710,7 +1788,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { // If SU does not have a register def, schedule it close to its uses // because it does not lengthen any live ranges. return 0; +#if 1 return SethiUllmanNumbers[SU->NodeNum]; +#else + unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; + if (SU->isCallOp) { + // FIXME: This assumes all of the defs are used as call operands. + int NP = (int)Priority - SU->getNode()->getNumValues(); + return (NP > 0) ? NP : 0; + } + return Priority; +#endif } //===----------------------------------------------------------------------===// @@ -1746,8 +1834,10 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) return true; } @@ -1858,9 +1948,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + RegPressure[RCId] += Cost; break; } } @@ -1873,16 +1964,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs > 0) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); RegPressure[RCId] = 0; } else { - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] -= Cost; } } dumpRegPressure(); @@ -2008,7 +2099,29 @@ static unsigned calcMaxScratches(const SUnit *SU) { return Scratches; } -/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *PredSU = I->getSUnit(); + if (PredSU->getNode() && + PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are /// CopyToReg to a virtual register. This SU def is probably a liveout and /// it has no other use. It should be scheduled closer to the terminator. static bool hasOnlyLiveOutUses(const SUnit *SU) { @@ -2030,62 +2143,71 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { return RetVal; } -/// UnitsSharePred - Return true if the two scheduling units share a common -/// data predecessor. -static bool UnitsSharePred(const SUnit *left, const SUnit *right) { - SmallSet<const SUnit*, 4> Preds; - for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { + if (DisableSchedVRegCycle) + return; + + if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) + return; + + DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + + SU->isVRegCycle = true; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - Preds.insert(I->getSUnit()); + if (I->isCtrl()) continue; + I->getSUnit()->isVRegCycle = true; } - for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { + if (!SU->isVRegCycle) + return; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - if (Preds.count(I->getSUnit())) - return true; + SUnit *PredSU = I->getSUnit(); + if (PredSU->isVRegCycle) { + assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && + "VRegCycle def must be CopyFromReg"); + I->getSUnit()->isVRegCycle = 0; + } } - return false; } -// Return true if the virtual register defined by VRCycleSU may interfere with -// VRUseSU. -// -// Note: We may consider two SU's that use the same value live into a loop as -// interferng even though the value is not an induction variable. This is an -// unfortunate consequence of scheduling on the selection DAG. -static bool checkVRegCycleInterference(const SUnit *VRCycleSU, - const SUnit *VRUseSU) { - for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(), - E = VRCycleSU->Preds.end(); I != E; ++I) { +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { + // If this SU also defines the VReg, don't hoist it as a "use". + if (SU->isVRegCycle) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - SDNode *InNode = I->getSUnit()->getNode(); - if (!InNode || InNode->getOpcode() != ISD::CopyFromReg) - continue; - for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(), - EE = VRUseSU->Preds.end(); II != EE; ++II) { - if (II->getSUnit() == I->getSUnit()) - return true; + if (I->getSUnit()->isVRegCycle && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); + return true; } } return false; } -// Compare the VRegCycle properties of the nodes. -// Return -1 if left has higher priority, 1 if right has higher priority. -// Return 0 if priority is equivalent. -static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) { - if (left->isVRegCycle && !right->isVRegCycle) { - if (checkVRegCycleInterference(left, right)) - return -1; - } - else if (!left->isVRegCycle && right->isVRegCycle) { - if (checkVRegCycleInterference(right, left)) - return 1; - } - return 0; -} - // Check for either a dependence (latency) or resource (hazard) stall. // // Note: The ScheduleHazardRecognizer interface requires a non-const SU. @@ -2101,23 +2223,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { // Return 0 if latency-based priority is equivalent. static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ) { - // If the two nodes share an operand and one of them has a single - // use that is a live out copy, favor the one that is live out. Otherwise - // it will be difficult to eliminate the copy if the instruction is a - // loop induction variable update. e.g. - // BB: - // sub r1, r3, #1 - // str r0, [r2, r3] - // mov r3, r1 - // cmp - // bne BB - bool SharePred = UnitsSharePred(left, right); - // FIXME: Only adjust if BB is a loop back edge. - // FIXME: What's the cost of a copy? - int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; - int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; - int LHeight = (int)left->getHeight() - LBonus; - int RHeight = (int)right->getHeight() - RBonus; + // Scheduling an instruction that uses a VReg whose postincrement has not yet + // been scheduled will induce a copy. Model this as an extra cycle of latency. + int LPenalty = hasVRegCycleUse(left) ? 1 : 0; + int RPenalty = hasVRegCycleUse(right) ? 1 : 0; + int LHeight = (int)left->getHeight() + LPenalty; + int RHeight = (int)right->getHeight() + RPenalty; bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && BUHasStall(left, LHeight, SPQ); @@ -2128,48 +2239,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) + if (!RStall) { + DEBUG(++FactorCount[FactStall]); return 1; - if (LHeight != RHeight) + } + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactStall]); return LHeight > RHeight ? 1 : -1; - } else if (RStall) + } + } else if (RStall) { + DEBUG(++FactorCount[FactStall]); return -1; + } // If either node is scheduling for latency, sort them by height/depth // and latency. if (!checkPref || (left->SchedulingPref == Sched::Latency || right->SchedulingPref == Sched::Latency)) { if (DisableSchedCycles) { - if (LHeight != RHeight) + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactHeight]); return LHeight > RHeight ? 1 : -1; + } } else { // If neither instruction stalls (!LStall && !RStall) then // its height is already covered so only its depth matters. We also reach // this if both stall but have the same height. - unsigned LDepth = left->getDepth(); - unsigned RDepth = right->getDepth(); + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { + DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) + if (left->Latency != right->Latency) { + DEBUG(++FactorCount[FactOther]); return left->Latency > right->Latency ? 1 : -1; + } } return 0; } static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { + // Schedule physical register definitions close to their use. This is + // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as + // long as shortening physreg live ranges is generally good, we can defer + // creating a subtarget hook. + if (!DisableSchedPhysRegJoin) { + bool LHasPhysReg = left->hasPhysRegDefs; + bool RHasPhysReg = right->hasPhysRegDefs; + if (LHasPhysReg != RHasPhysReg) { + DEBUG(++FactorCount[FactRegUses]); + #ifndef NDEBUG + const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; + #endif + DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " + << PhysRegMsg[RHasPhysReg] << "\n"); + return LHasPhysReg < RHasPhysReg; + } + } + + // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); + + // Be really careful about hoisting call operands above previous calls. + // Only allows it if it would reduce register pressure. + if (left->isCall && right->isCallOp) { + unsigned RNumVals = right->getNode()->getNumValues(); + RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; + } + if (right->isCall && left->isCallOp) { + unsigned LNumVals = left->getNode()->getNumValues(); + LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; + } + if (LPriority != RPriority) { DEBUG(++FactorCount[FactStatic]); return LPriority > RPriority; } - DEBUG(++FactorCount[FactOther]); + + // One or both of the nodes are calls and their sethi-ullman numbers are the + // same, then keep source order. + if (left->isCall || right->isCall) { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + } // Try schedule def + use closer when Sethi-Ullman numbers are the same. // e.g. @@ -2190,40 +2355,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) + if (LDist != RDist) { + DEBUG(++FactorCount[FactOther]); return LDist < RDist; + } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) + if (LScratch != RScratch) { + DEBUG(++FactorCount[FactOther]); return LScratch > RScratch; + } - if (!DisableSchedCycles) { + // Comparing latency against a call makes little sense unless the node + // is register pressure-neutral. + if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) + return (left->NodeQueueId > right->NodeQueueId); + + // Do not compare latencies when one or both of the nodes are calls. + if (!DisableSchedCycles && + !(left->isCall || right->isCall)) { int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); if (result != 0) return result > 0; } else { - if (left->getHeight() != right->getHeight()) + if (left->getHeight() != right->getHeight()) { + DEBUG(++FactorCount[FactHeight]); return left->getHeight() > right->getHeight(); + } - if (left->getDepth() != right->getDepth()) + if (left->getDepth() != right->getDepth()) { + DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); + } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); + DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } // Bottom up bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + return BURRSort(left, right, SPQ); } // Source order, otherwise bottom up. bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -2255,6 +2442,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { // Return true if right should be scheduled with higher priority than left. bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); @@ -2264,24 +2454,22 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; } - int result = 0; - if (!DisableSchedVRegCycle) { - result = BUCompareVRegCycle(left, right); - } - if (result == 0 && !LHigh && !RHigh) { - result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (!LHigh && !RHigh) { + int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (result != 0) + return result > 0; } - if (result != 0) - return result > 0; return BURRSort(left, right, SPQ); } @@ -2322,6 +2510,9 @@ static bool canEnableCoalescing(SUnit *SU) { // list-ilp is currently an experimental scheduler that allows various // heuristics to be enabled prior to the normal register reduction logic. bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); @@ -2347,12 +2538,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (RReduce && !LReduce) return true; } - if (!DisableSchedVRegCycle) { - int result = BUCompareVRegCycle(left, right); - if (result != 0) - return result > 0; - } - if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); @@ -2391,6 +2576,24 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { return BURRSort(left, right, SPQ); } +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + + // For single block loops, mark nodes that look like canonical IV increments. + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { + for (unsigned i = 0, e = sunits.size(); i != e; ++i) { + initVRegCycle(&sunits[i]); + } + } +} + //===----------------------------------------------------------------------===// // Preschedule for Register Pressure //===----------------------------------------------------------------------===// @@ -2668,6 +2871,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, // Top down bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res < 0; + unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 24a1937..0d656ef 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -83,10 +83,13 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SU->Latency = Old->Latency; SU->isVRegCycle = Old->isVRegCycle; SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; SU->SchedulingPref = Old->SchedulingPref; Old->isCloned = true; return SU; @@ -283,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); + SmallVector<SUnit*, 8> CallSUnits; while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); @@ -335,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { if (!HasGlueUse) break; } + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + // If there are glue operands involved, N is now the bottom-most node // of the sequence of nodes that are glued together. // Update the SUnit. @@ -342,16 +355,26 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); - // Set isVRegCycle if the node operands are live into and value is live out - // of a single block loop. - InitVRegCycleFlag(NodeSUnit); - // Compute NumRegDefsLeft. This must be done before AddSchedEdges. InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } } void ScheduleDAGSDNodes::AddSchedEdges() { @@ -412,11 +435,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. - if (Cost >= 0) + if (Cost >= 0 && !StressSched) PhysReg = 0; // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { @@ -512,47 +539,6 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { } } -// Set isVRegCycle if this node's single use is CopyToReg and its only active -// data operands are CopyFromReg. -// -// This is only relevant for single-block loops, in which case the VRegCycle -// node is likely an induction variable in which the operand and target virtual -// registers should be coalesced (e.g. pre/post increment values). Setting the -// isVRegCycle flag helps the scheduler prioritize other uses of the same -// CopyFromReg so that this node becomes the virtual register "kill". This -// avoids interference between the values live in and out of the block and -// eliminates a copy inside the loop. -void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) { - if (!BB->isSuccessor(BB)) - return; - - SDNode *N = SU->getNode(); - if (N->getGluedNode()) - return; - - if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg) - return; - - bool FoundLiveIn = false; - for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) { - EVT OpVT = OI->getValueType(); - assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); - - if (OpVT == MVT::Other) - continue; // ignore chain operands - - if (isPassiveNode(OI->getNode())) - continue; // ignore constants and such - - if (OI->getNode()->getOpcode() != ISD::CopyFromReg) - return; - - FoundLiveIn = true; - } - if (FoundLiveIn) - SU->isVRegCycle = true; -} - void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { assert(SU->NumRegDefsLeft == 0 && "expect a new node"); for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { @@ -562,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { SU->Latency = 1; @@ -569,7 +565,6 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } if (!InstrItins || InstrItins->isEmpty()) { - SDNode *N = SU->getNode(); if (N && N->isMachineOpcode() && TII->isHighLatencyDef(N->getMachineOpcode())) SU->Latency = HighLatencyCycles; @@ -641,7 +636,7 @@ namespace { }; } -/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index b5f68f3..3ad2bd6 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -135,6 +135,14 @@ namespace llvm { return ValueType; } + const SDNode *GetNode() const { + return Node; + } + + unsigned GetIdx() const { + return DefIdx; + } + void Advance(); private: void InitNodeNumDefs(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c2711c8..68eeb60 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2050,14 +2050,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, break; default: - // Allow the target to implement this method for its nodes. - if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + if (Op.getOpcode() < ISD::BUILTIN_OP_END) + break; + // Fallthrough case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, - Depth); - } + // Allow the target to implement this method for its nodes. + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, + Depth); return; } } @@ -2322,6 +2323,13 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const { return !C->isZero(); // TODO: Recognize more cases here. + switch (Op.getOpcode()) { + default: break; + case ISD::OR: + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return !C->isNullValue(); + break; + } return false; } @@ -2339,16 +2347,6 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } -bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { - GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); - if (!GA) return false; - if (GA->getOffset() != 0) return false; - const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); - if (!GV) return false; - return MF->getMMI().hasDebugInfo(); -} - - /// getNode - Gets or creates the specified node. /// SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { @@ -6304,7 +6302,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, MVT::i32)); + getConstant(i, TLI.getPointerTy())); } else { // A scalar operand; just use it as is. Operands[j] = Operand; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 24c325e..d79a5ae 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -280,12 +280,35 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } // Vector/Vector bitcast. - return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + if (ValueVT.getSizeInBits() == PartVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + "Cannot handle this kind of promotion"); + // Promoted vector extract + bool Smaller = ValueVT.bitsLE(PartVT); + return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT, Val); + } - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + // Trivial bitcast if the types are the same size and the destination + // vector type is legal. + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() && + TLI.isTypeLegal(ValueVT)) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle cases such as i8 -> <1 x i1> + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); + + if (ValueVT.getVectorNumElements() == 1 && + ValueVT.getVectorElementType() != PartVT) { + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT.getScalarType(), Val); + } + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -426,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && - PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorElementType() == ValueVT.getVectorElementType() && PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in @@ -446,13 +469,33 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); - } else { + } else if (PartVT.isVector() && + PartVT.getVectorElementType().bitsGE( + ValueVT.getVectorElementType()) && + PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + + // Promoted vector extract + unsigned NumElts = ValueVT.getVectorNumElements(); + SmallVector<SDValue, 8> NewOps; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i)); + SDValue Cast = DAG.getNode(ISD::ANY_EXTEND, + DL, PartVT.getScalarType(), Ext); + NewOps.push_back(Cast); + } + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, + &NewOps[0], NewOps.size()); + } else{ // Vector -> scalar conversion. - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getIntPtrConstant(0)); + + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } Parts[0] = Val; @@ -783,11 +826,20 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - DanglingDebugInfoMap.clear(); CurDebugLoc = DebugLoc(); HasTailCall = false; } +/// clearDanglingDebugInfo - Clear the dangling debug information +/// map. This function is seperated from the clear so that debug +/// information that is dangling in a basic block can be properly +/// resolved in a different basic block. This allows the +/// SelectionDAG to resolve dangling debug information attached +/// to PHI nodes. +void SelectionDAGBuilder::clearDanglingDebugInfo() { + DanglingDebugInfoMap.clear(); +} + /// getRoot - Return the current virtual root of the Selection DAG, /// flushing any PendingLoad items. This must be done before emitting /// a store or any other node that may need to be ordered after any @@ -1175,6 +1227,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { /// created for it, emit nodes to copy the value into the virtual /// registers. void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + // Skip empty types + if (V->getType()->isEmptyTy()) + return; + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { assert(!V->use_empty() && "Unused value assigned virtual registers!"); @@ -1223,6 +1279,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, return true; } +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (!BPI) + return 0; + BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock()); + BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock()); + return BPI->getEdgeWeight(SrcBB, DstBB); +} + +void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + uint32_t weight = getEdgeWeight(Src, Dst); + Src->addSuccessor(Dst, weight); +} + + static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast<Instruction>(V)) return I->getParent() == BB; @@ -1492,8 +1566,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - SwitchBB->addSuccessor(CB.TrueBB); - SwitchBB->addSuccessor(CB.FalseBB); + addSuccessorWithWeight(SwitchBB, CB.TrueBB); + addSuccessorWithWeight(SwitchBB, CB.FalseBB); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1637,8 +1711,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - SwitchBB->addSuccessor(B.Default); - SwitchBB->addSuccessor(MBB); + addSuccessorWithWeight(SwitchBB, B.Default); + addSuccessorWithWeight(SwitchBB, MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, @@ -1683,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - SwitchBB->addSuccessor(B.TargetBB); - SwitchBB->addSuccessor(NextMBB); + addSuccessorWithWeight(SwitchBB, B.TargetBB); + addSuccessorWithWeight(SwitchBB, NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1924,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // table. MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, JumpTableBB); - CR.CaseBB->addSuccessor(Default); - CR.CaseBB->addSuccessor(JumpTableBB); + + addSuccessorWithWeight(CR.CaseBB, Default); + addSuccessorWithWeight(CR.CaseBB, JumpTableBB); // Build a vector of destination BBs, corresponding to each target // of the jump table. If the value of the jump table slot corresponds to @@ -1952,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - JumpTableBB->addSuccessor(*I); + addSuccessorWithWeight(JumpTableBB, *I); } } @@ -2019,9 +2094,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); - double LDensity = (double)LSize.roundToDouble() / + // Use volatile double here to avoid excess precision issues on some hosts, + // e.g. that use 80-bit X87 registers. + volatile double LDensity = + (double)LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize.roundToDouble() / + volatile double RDensity = + (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place @@ -2367,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { succs.push_back(I.getSuccessor(i)); array_pod_sort(succs.begin(), succs.end()); succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) - IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + for (unsigned i = 0, e = succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + addSuccessorWithWeight(IndirectBrMBB, Succ); + } DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2806,16 +2887,18 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { SmallVector<SDValue, 4> Values(NumAggValues); SDValue Agg = getValue(Op0); - SDValue Val = getValue(Op1); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. for (; i != LinearIndex; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); // Copy values from the inserted value(s). - for (; i != LinearIndex + NumValValues; ++i) - Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : - SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + if (NumValValues) { + SDValue Val = getValue(Op1); + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + } // Copy remaining value(s) from the original aggregate. for (; i != NumAggValues; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : @@ -2838,6 +2921,13 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); + + // Ignore a extractvalue that produces an empty object + if (!NumValValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + SmallVector<SDValue, 4> Values(NumValValues); SDValue Agg = getValue(Op0); @@ -4009,6 +4099,24 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +// getTruncatedArgReg - Find underlying register used for an truncated +// argument. +static unsigned getTruncatedArgReg(const SDValue &N) { + if (N.getOpcode() != ISD::TRUNCATE) + return 0; + + const SDValue &Ext = N.getOperand(0); + if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ + const SDValue &CFR = Ext.getOperand(0); + if (CFR.getOpcode() == ISD::CopyFromReg) + return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); + else + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); + } + return 0; +} + /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. @@ -4029,10 +4137,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBB; - if (MBB != &MF.front()) - return false; - unsigned Reg = 0; if (Arg->hasByValAttr()) { // Byval arguments' frame index is recorded during argument lowering. @@ -4044,9 +4148,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, Reg = 0; } - if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { - Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (N.getNode()) { + if (N.getOpcode() == ISD::CopyFromReg) + Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); + else + Reg = getTruncatedArgReg(N); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); if (PR) @@ -4208,9 +4315,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getDbgValue(Variable, FINode->getIndex(), 0, dl, SDNodeOrder); else { - // Can't do anything with other non-AI cases yet. This might be a - // parameter of a callee function that got inlined, for example. - DEBUG(dbgs() << "Dropping debug info for " << DI); + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, 0, N); return 0; } } else if (AI) @@ -4403,7 +4510,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_dispatch_setup: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot())); + getRoot(), getValue(I.getArgOperand(0)))); return 0; } @@ -4672,9 +4779,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; - case Intrinsic::trap: - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + case Intrinsic::trap: { + StringRef TrapFuncName = getTrapFunctionName(); + if (TrapFuncName.empty()) { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> Result = + TLI.LowerCallTo(getRoot(), I.getType(), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), + Args, DAG, getCurDebugLoc()); + DAG.setRoot(Result.second); return 0; + } case Intrinsic::uadd_with_overflow: return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: @@ -4689,15 +4809,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { - SDValue Ops[4]; + SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, DAG.getVTList(MVT::Other), - &Ops[0], 4, + &Ops[0], 5, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ @@ -4784,7 +4905,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Outs, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), Outs, FTy->getContext()); + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; @@ -4814,8 +4937,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - SDValue ArgNode = getValue(*i); - Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); @@ -5255,6 +5384,7 @@ public: const llvm::Type *OpTy = CallOperandVal->getType(); + // FIXME: code duplicated from TargetLowering::ParseConstraints(). // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { @@ -5264,6 +5394,11 @@ public: OpTy = PtrTy->getElementType(); } + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (const StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { @@ -5315,6 +5450,8 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, EVT ThisVT = MVT::Other; const TargetRegisterClass *RC = *RCI; + if (!RC->isAllocatable()) + continue; // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); @@ -5336,15 +5473,14 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, // frame pointer in functions that need it (due to them not being taken // out of allocation, because a variable sized allocation hasn't been seen // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } + ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(MF); + if (std::find(RawOrder.begin(), RawOrder.end(), Reg) != RawOrder.end()) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } } return FoundRC; } @@ -5491,9 +5627,15 @@ static void GetRegistersForValue(SelectionDAG &DAG, OpInfo.ConstraintVT); const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + BitVector Reserved = TRI->getReservedRegs(MF); unsigned NumAllocated = 0; for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { unsigned Reg = RegClassRegs[i]; + // Filter out the reserved registers, but note that reserved registers are + // not fully determined at this point. We may still decide we need a frame + // pointer. + if (Reserved.test(Reg)) + continue; // See if this register is available. if ((isOutReg && OutputRegs.count(Reg)) || // Already used. (isInReg && InputRegs.count(Reg))) { // Already used. @@ -5542,7 +5684,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::set<unsigned> OutputRegs, InputRegs; - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); + TargetLowering::AsmOperandInfoVector + TargetConstraints = TLI.ParseConstraints(CS); + bool hasMemory = false; unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. @@ -5601,7 +5745,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { hasMemory = true; else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { - TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]); + TargetLowering::ConstraintType + CType = TLI.getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -5651,12 +5796,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { - assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && + assert((OpInfo.isMultipleAlternative || + (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. If we don't have // an indirect input, put it in the constpool if we can, otherwise spill // it to a stack slot. + // TODO: This isn't quite right. We need to handle these according to + // the addressing mode that the constraint wants. Also, this may take + // an additional register for the computation and we don't want that + // either. // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. @@ -5858,7 +6008,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; - TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) report_fatal_error("Invalid operand for inline asm constraint '" + @@ -6067,14 +6217,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, Flags.setByVal(); const PointerType *Ty = cast<PointerType>(Args[i].Ty); const Type *ElementTy = Ty->getElementType(); - unsigned FrameAlign = getByValTypeAlignment(ElementTy); - unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. + unsigned FrameAlign; if (Args[i].Alignment) FrameAlign = Args[i].Alignment; + else + FrameAlign = getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); - Flags.setByValSize(FrameSize); } if (Args[i].isNest) Flags.setNest(); @@ -6180,7 +6331,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In - // that case, nothing will actualy look at the value. + // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), Chain); @@ -6219,6 +6370,25 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { #include "llvm/CodeGen/SelectionDAGISel.h" +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(const Argument *A) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + if (EnableFastISel) + return A->use_empty(); + + const BasicBlock *Entry = A->getParent()->begin(); + for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) + return false; // Use not in entry block. + } + return true; +} + void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. const Function &F = *LLVMBB->getParent(); @@ -6273,14 +6443,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { Flags.setByVal(); const PointerType *Ty = cast<PointerType>(I->getType()); const Type *ElementTy = Ty->getElementType(); - unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy); - unsigned FrameSize = TD->getTypeAllocSize(ElementTy); + Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. + unsigned FrameAlign; if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); + else + FrameAlign = TLI.getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); - Flags.setByValSize(FrameSize); } if (F.paramHasAttr(Idx, Attribute::Nest)) Flags.setNest(); @@ -6362,8 +6533,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { if (I->use_empty() && NumValues) SDB->setUnusedArgValue(I, InVals[i]); - for (unsigned Value = 0; Value != NumValues; ++Value) { - EVT VT = ValueVTs[Value]; + for (unsigned Val = 0; Val != NumValues; ++Val) { + EVT VT = ValueVTs[Val]; EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); @@ -6382,21 +6553,35 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { i += NumParts; } + // We don't need to do anything else for unused arguments. + if (ArgValues.empty()) + continue; + // Note down frame index for byval arguments. - if (I->hasByValAttr() && !ArgValues.empty()) + if (I->hasByValAttr()) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); - if (!I->use_empty()) { - SDValue Res; - if (!ArgValues.empty()) - Res = DAG.getMergeValues(&ArgValues[0], NumValues, - SDB->getCurDebugLoc()); - SDB->setValue(I, Res); - - // If this argument is live outside of the entry block, insert a copy from - // whereever we got it to the vreg that other BB's will reference it as. + SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDB->getCurDebugLoc()); + SDB->setValue(I, Res); + + // If this argument is live outside of the entry block, insert a copy from + // wherever we got it to the vreg that other BB's will reference it as. + if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + // If we can, though, try to skip creating an unnecessary vreg. + // FIXME: This isn't very clean... it would be nice to make this more + // general. It's also subtly incompatible with the hacks FastISel + // uses with vregs. + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + FuncInfo->ValueMap[I] = Reg; + continue; + } + } + if (!isOnlyUsedInEntryBlock(I)) { + FuncInfo->InitializeRegForValue(I); SDB->CopyToExportRegsIfNeeded(I); } } @@ -6442,6 +6627,10 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Ignore dead phi's. if (PN->use_empty()) continue; + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + unsigned Reg; const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f546612..a1ca891 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -23,7 +23,6 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> -#include <set> namespace llvm { @@ -333,6 +332,14 @@ public: /// consumed. void clear(); + /// clearDanglingDebugInfo - Clear the dangling debug information + /// map. This function is seperated from the clear so that debug + /// information that is dangling in a basic block can be properly + /// resolved in a different basic block. This allows the + /// SelectionDAG to resolve dangling debug information attached + /// to PHI nodes. + void clearDanglingDebugInfo(); + /// getRoot - Return the current virtual root of the Selection DAG, /// flushing any PendingLoad items. This must be done before emitting /// a store or any other node that may need to be ordered after any @@ -427,6 +434,9 @@ private: const Value* SV, MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); + + uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4b6066e..dc8044b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -55,17 +56,11 @@ using namespace llvm; STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); -#ifndef NDEBUG -STATISTIC(NumBBWithOutOfOrderLineInfo, - "Number of blocks with out of order line number info"); -STATISTIC(NumMBBWithOutOfOrderLineInfo, - "Number of machine blocks with out of order line number info"); -#endif - static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -74,6 +69,11 @@ static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt<bool> +UseMBPI("use-mbpi", + cl::desc("use Machine Branch Probability Info"), + cl::init(true), cl::Hidden); + #ifndef NDEBUG static cl::opt<bool> ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, @@ -192,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -205,43 +206,11 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); + if (UseMBPI && OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } -/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or -/// other function that gcc recognizes as "returning twice". This is used to -/// limit code-gen optimizations on the machine function. -/// -/// FIXME: Remove after <rdar://problem/8031714> is fixed. -static bool FunctionCallsSetJmp(const Function *F) { - const Module *M = F->getParent(); - static const char *ReturnsTwiceFns[] = { - "_setjmp", - "setjmp", - "sigsetjmp", - "setjmp_syscall", - "savectx", - "qsetjmp", - "vfork", - "getcontext" - }; -#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *) - - for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I) - if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) { - if (!Callee->use_empty()) - for (Value::const_use_iterator - I = Callee->use_begin(), E = Callee->use_end(); - I != E; ++I) - if (const CallInst *CI = dyn_cast<CallInst>(*I)) - if (CI->getParent()->getParent() == F) - return true; - } - - return false; -#undef NUM_RETURNS_TWICE_FNS -} - /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that /// may trap on it. In this case we have to split the edge so that the path /// through the predecessor block that doesn't go to the phi block doesn't @@ -302,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CurDAG->init(*MF); FuncInfo->set(Fn, *MF); + + if (UseMBPI && OptLevel != CodeGenOpt::None) + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); + else + FuncInfo->BPI = 0; + SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -392,7 +367,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there is a call to setjmp in the machine function. - MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice()); // Replace forward-declared registers with the registers containing // the desired value. @@ -421,10 +396,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -void -SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End, - bool &HadTailCall) { +void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. Terminators // are handled below. @@ -438,7 +412,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); - return; } void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -572,7 +545,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); - CurDAG->Legalize(OptLevel); + CurDAG->Legalize(); } DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber @@ -748,16 +721,49 @@ void SelectionDAGISel::PrepareEHLandingPad() { - +/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified +/// load into the specified FoldInst. Note that we could have a sequence where +/// multiple LLVM IR instructions are folded into the same machineinstr. For +/// example we could have: +/// A: x = load i32 *P +/// B: y = icmp A, 42 +/// C: br y, ... +/// +/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and +/// any other folded instructions) because it is between A and C. +/// +/// If we succeed in folding the load into the operation, return true. +/// bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + const Instruction *FoldInst, FastISel *FastIS) { + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + // Don't try to fold volatile loads. Target has to deal with alignment // constraints. if (LI->isVolatile()) return false; - // Figure out which vreg this is going into. + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. unsigned LoadReg = FastIS->getRegForValue(LI); - assert(LoadReg && "Load isn't already assigned a vreg? "); + if (LoadReg == 0) + return false; // Check to see what the uses of this vreg are. If it has no uses, or more // than one use (at the machine instr level) then we can't fold it. @@ -788,48 +794,17 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); } -#ifndef NDEBUG -/// CheckLineNumbers - Check if basic block instructions follow source order -/// or not. -static void CheckLineNumbers(const BasicBlock *BB) { - unsigned Line = 0; - unsigned Col = 0; - for (BasicBlock::const_iterator BI = BB->begin(), - BE = BB->end(); BI != BE; ++BI) { - const DebugLoc DL = BI->getDebugLoc(); - if (DL.isUnknown()) continue; - unsigned L = DL.getLine(); - unsigned C = DL.getCol(); - if (L < Line || (L == Line && C < Col)) { - ++NumBBWithOutOfOrderLineInfo; - return; - } - Line = L; - Col = C; - } +/// isFoldedOrDeadInstruction - Return true if the specified instruction is +/// side-effect free and is either dead or folded into a generated instruction. +/// Return false if it needs to be emitted. +static bool isFoldedOrDeadInstruction(const Instruction *I, + FunctionLoweringInfo *FuncInfo) { + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } -/// CheckLineNumbers - Check if machine basic block instructions follow source -/// order or not. -static void CheckLineNumbers(const MachineBasicBlock *MBB) { - unsigned Line = 0; - unsigned Col = 0; - for (MachineBasicBlock::const_iterator MBI = MBB->begin(), - MBE = MBB->end(); MBI != MBE; ++MBI) { - const DebugLoc DL = MBI->getDebugLoc(); - if (DL.isUnknown()) continue; - unsigned L = DL.getLine(); - unsigned C = DL.getCol(); - if (L < Line || (L == Line && C < Col)) { - ++NumMBBWithOutOfOrderLineInfo; - return; - } - Line = L; - Col = C; - } -} -#endif - void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -841,9 +816,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { for (ReversePostOrderTraversal<const Function*>::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { const BasicBlock *LLVMBB = *I; -#ifndef NDEBUG - CheckLineNumbers(LLVMBB); -#endif if (OptLevel != CodeGenOpt::None) { bool AllPredsVisited = true; @@ -856,15 +828,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (AllPredsVisited) { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa<PHINode>(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa<PHINode>(I); ++I) FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I)); - } } else { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa<PHINode>(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa<PHINode>(I); ++I) FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I)); - } } FuncInfo->VisitedBBs.insert(LLVMBB); @@ -912,10 +882,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (!Inst->mayWriteToMemory() && - !isa<TerminatorInst>(Inst) && - !isa<DbgInfoIntrinsic>(Inst) && - !FuncInfo->isExportedInst(Inst)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) continue; // Bottom-up: reset the insert pos at the top, after any local-value @@ -924,16 +891,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { - // If fast isel succeeded, check to see if there is a single-use - // non-volatile load right before the selected instruction, and see if - // the load is used by the instruction. If so, try to fold it. - const Instruction *BeforeInst = 0; - if (Inst != Begin) - BeforeInst = llvm::prior(llvm::prior(BI)); - if (BeforeInst && isa<LoadInst>(BeforeInst) && - BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) - --BI; // If we succeeded, don't re-select the load. + ++NumFastIselSuccess; + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != Begin) { + BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) + // If we succeeded, don't re-select the load. + BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); continue; } @@ -963,9 +935,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } - // Otherwise, give up on FastISel for the rest of the block. - // For now, be a little lenient about non-branch terminators. - if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) { + if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { + // Don't abort, and use a different message for terminator misses. + ++NumFastIselFailures; + if (EnableFastISelVerbose || EnableFastISelAbort) { + dbgs() << "FastISel missed terminator: "; + Inst->dump(); + } + } else { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; @@ -987,22 +964,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { else ++NumFastIselBlocks; - // Run SelectionDAG instruction selection on the remainder of the block - // not handled by FastISel. If FastISel is not run, this is the entire - // block. - bool HadTailCall; - SelectBasicBlock(Begin, BI, HadTailCall); + if (Begin != BI) { + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + } FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); } delete FastIS; -#ifndef NDEBUG - for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end(); - MBI != MBE; ++MBI) - CheckLineNumbers(MBI); -#endif + SDB->clearDanglingDebugInfo(); } void @@ -2634,11 +2609,45 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // instructions that access memory and for ComplexPatterns that match // loads. if (EmitNodeInfo & OPFL_MemRefs) { + // Only attach load or store memory operands if the generated + // instruction may load or store. + const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc); + bool mayLoad = TID.mayLoad(); + bool mayStore = TID.mayStore(); + + unsigned NumMemRefs = 0; + for (SmallVector<MachineMemOperand*, 2>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + if ((*I)->isLoad()) { + if (mayLoad) + ++NumMemRefs; + } else if ((*I)->isStore()) { + if (mayStore) + ++NumMemRefs; + } else { + ++NumMemRefs; + } + } + MachineSDNode::mmo_iterator MemRefs = - MF->allocateMemRefsArray(MatchedMemRefs.size()); - std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs); + MF->allocateMemRefsArray(NumMemRefs); + + MachineSDNode::mmo_iterator MemRefsPos = MemRefs; + for (SmallVector<MachineMemOperand*, 2>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + if ((*I)->isLoad()) { + if (mayLoad) + *MemRefsPos++ = *I; + } else if ((*I)->isStore()) { + if (mayStore) + *MemRefsPos++ = *I; + } else { + *MemRefsPos++ = *I; + } + } + cast<MachineSDNode>(Res) - ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size()); + ->setMemRefs(MemRefs, MemRefs + NumMemRefs); } DEBUG(errs() << " " diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4b0822d..efbfaa4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -26,11 +26,19 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cctype> using namespace llvm; +/// We are in the process of implementing a new TypeLegalization action +/// - the promotion of vector elements. This feature is disabled by default +/// and only enabled using this flag. +static cl::opt<bool> +AllowPromoteIntElem("promote-elements", cl::Hidden, + cl::desc("Allow promotion of integer vector element types")); + namespace llvm { TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { bool isLocal = GV->hasLocalLinkage(); @@ -528,7 +536,8 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { + : TM(tm), TD(TM.getTargetData()), TLOF(*tlof), + mayPromoteElements(AllowPromoteIntElem) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -596,6 +605,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm, SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; PrefLoopAlignment = 0; MinStackArgumentAlignment = 1; ShouldFoldAtomicFences = false; @@ -662,10 +673,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, NewVT = EltTy; IntermediateVT = NewVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -747,7 +764,7 @@ void TargetLowering::computeRegisterProperties() { NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); - ValueTypeActions.setTypeAction(ExpandedVT, Expand); + ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger); } // Inspect all of the ValueType's smaller than the largest integer @@ -761,7 +778,7 @@ void TargetLowering::computeRegisterProperties() { } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = (MVT::SimpleValueType)LegalIntReg; - ValueTypeActions.setTypeAction(IVT, Promote); + ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } @@ -770,7 +787,7 @@ void TargetLowering::computeRegisterProperties() { NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; RegisterTypeForVT[MVT::ppcf128] = MVT::f64; TransformToType[MVT::ppcf128] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); } // Decide how to handle f64. If the target does not have native f64 support, @@ -779,7 +796,7 @@ void TargetLowering::computeRegisterProperties() { NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; TransformToType[MVT::f64] = MVT::i64; - ValueTypeActions.setTypeAction(MVT::f64, Expand); + ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); } // Decide how to handle f32. If the target does not have native support for @@ -789,12 +806,12 @@ void TargetLowering::computeRegisterProperties() { NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; TransformToType[MVT::f32] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::f32, Promote); + ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); } else { NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; TransformToType[MVT::f32] = MVT::i32; - ValueTypeActions.setTypeAction(MVT::f32, Expand); + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } } @@ -810,6 +827,30 @@ void TargetLowering::computeRegisterProperties() { unsigned NElts = VT.getVectorNumElements(); if (NElts != 1) { bool IsLegalWiderType = false; + // If we allow the promotion of vector elements using a flag, + // then return TypePromoteInteger on vector elements. + // First try to promote the elements of integer vectors. If no legal + // promotion was found, fallback to the widen-vector method. + if (mayPromoteElements) + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + // Promote vectors of integers to vectors with the same number + // of elements, with a wider element type. + if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() + && SVT.getVectorNumElements() == NElts && + isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypePromoteInteger); + IsLegalWiderType = true; + break; + } + } + + if (IsLegalWiderType) continue; + + // Try to widen the vector. for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; if (SVT.getVectorElementType() == EltVT && @@ -818,7 +859,7 @@ void TargetLowering::computeRegisterProperties() { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; - ValueTypeActions.setTypeAction(VT, Promote); + ValueTypeActions.setTypeAction(VT, TypeWidenVector); IsLegalWiderType = true; break; } @@ -838,10 +879,12 @@ void TargetLowering::computeRegisterProperties() { if (NVT == VT) { // Type is already a power of 2. The default action is to split. TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); + unsigned NumElts = VT.getVectorNumElements(); + ValueTypeActions.setTypeAction(VT, + NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); } else { TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); + ValueTypeActions.setTypeAction(VT, TypeWidenVector); } } @@ -890,7 +933,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, // If there is a wider vector type with the same element type as this one, // we should widen to that legal vector type. This handles things like // <2 x float> -> <4 x float>. - if (NumElts != 1 && getTypeAction(VT) == Promote) { + if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) { RegisterVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterVT)) { IntermediateVT = RegisterVT; @@ -928,8 +971,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -1678,6 +1727,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); if (!ShAmt) break; + SDValue Shift = In.getOperand(1); + if (TLO.LegalTypes()) { + uint64_t ShVal = ShAmt->getZExtValue(); + Shift = + TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType())); + } + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); @@ -1691,7 +1747,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), NewTrunc, - In.getOperand(1))); + Shift)); } break; } @@ -1716,26 +1772,28 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::BITCAST: -#if 0 - // If this is an FP->Int bitcast and if the sign bit is the only thing that - // is demanded, turn this into a FGETSIGN. - if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) && - MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && - !MVT::isVector(Op.getOperand(0).getValueType())) { - // Only do this xform if FGETSIGN is valid or if before legalize. - if (!TLO.AfterLegalize || - isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { + // If this is an FP->Int bitcast and if the sign bit is the only + // thing demanded, turn this into a FGETSIGN. + if (!Op.getOperand(0).getValueType().isVector() && + NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && + Op.getOperand(0).getValueType().isFloatingPoint()) { + bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); + bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. - SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), - Op.getOperand(0)); + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); + unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits(); + if (!OpVTLegal && OpVTSizeInBits > 32) + Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); unsigned ShVal = Op.getValueType().getSizeInBits()-1; - SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(), + SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), Sign, ShAmt)); } } -#endif break; case ISD::ADD: case ISD::MUL: @@ -1842,7 +1900,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, DebugLoc dl) const { SelectionDAG &DAG = DCI.DAG; - LLVMContext &Context = *DAG.getContext(); // These setcc operations always fold. switch (Cond) { @@ -1853,12 +1910,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETTRUE2: return DAG.getConstant(1, VT); } - if (isa<ConstantSDNode>(N0.getNode())) { - // Ensure that the constant occurs on the RHS, and fold constant - // comparisons. + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + if (isa<ConstantSDNode>(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - } - + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1911,6 +1967,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. } + // (zext x) == C --> x == (trunc C) + if (DCI.isBeforeLegalize() && N0->hasOneUse() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + unsigned MinBits = N0.getValueSizeInBits(); + SDValue PreZExt; + if (N0->getOpcode() == ISD::ZERO_EXTEND) { + // ZExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreZExt = N0->getOperand(0); + } else if (N0->getOpcode() == ISD::AND) { + // DAGCombine turns costly ZExts into ANDs + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if ((C->getAPIntValue()+1).isPowerOf2()) { + MinBits = C->getAPIntValue().countTrailingOnes(); + PreZExt = N0->getOperand(0); + } + } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { + // ZEXTLOAD + if (LN0->getExtensionType() == ISD::ZEXTLOAD) { + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreZExt = N0; + } + } + + // Make sure we're not loosing bits from the constant. + if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); + if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { + // Will get folded away. + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); + return DAG.getSetCC(dl, VT, Trunc, C, Cond); + } + } + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. @@ -1949,7 +2041,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } if (bestWidth) { - EVT newVT = EVT::getIntegerVT(Context, bestWidth); + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); if (newVT.isRound()) { EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); @@ -2578,9 +2670,13 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, - char ConstraintLetter, + std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { + + if (Constraint.length() > 1) return; + + char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; case 'X': // Allows any operand; labels (basic block) use this. @@ -2769,6 +2865,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } + + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (const StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { @@ -3013,7 +3115,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector<SDValue> ResultOps; - TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i], ResultOps, *DAG); if (!ResultOps.empty()) { BestType = CType; |
