diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
29 files changed, 2723 insertions, 1412 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d7fa009..ff00d0d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18,22 +18,23 @@ #define DEBUG_TYPE "dagcombine" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include <algorithm> using namespace llvm; @@ -194,6 +195,7 @@ namespace { SDValue visitOR(SDNode *N); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); + SDValue SimplifyVUnaryOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -269,6 +271,8 @@ namespace { SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); + SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -288,6 +292,10 @@ namespace { unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const; + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1); + /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, @@ -300,6 +308,11 @@ namespace { /// looking for a better chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Merge consecutive store operations into a wide store. + /// This optimization uses wide integers or vectors when possible. + /// \return True if some memory operations were changed. + bool MergeConsecutiveStores(StoreSDNode *N); + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), @@ -385,10 +398,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth = 0) { - // No compile time optimizations on this type. - if (Op.getValueType() == MVT::ppcf128) - return 0; - // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; @@ -1174,7 +1183,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // Expose the DAG combiner to the target combiner impls. TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); + DagCombineInfo(DAG, Level, false, this); RV = TLI.PerformDAGCombine(N, DagCombineInfo); } @@ -1373,6 +1382,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (add x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; } // fold (add x, undef) -> undef @@ -1616,6 +1631,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (sub x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; } // fold (sub x, x) -> 0 @@ -1643,7 +1662,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { - SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); + SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), + VT); return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, N1.getOperand(0)); } @@ -2345,16 +2365,19 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // we don't want to undo this promotion. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. - if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) - && Level == AfterLegalizeTypes) { + if ((N0.getOpcode() == ISD::BITCAST || + N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && + Level == AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); - // If both incoming values are integers, and the original types are the same. + DebugLoc DL = N->getDebugLoc(); + // If both incoming values are integers, and the original types are the + // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); - SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); AddToWorkList(Op.getNode()); return BC; } @@ -2415,6 +2438,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (and x, 0) -> 0, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + + // fold (and x, -1) -> x, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N0; } // fold (and x, undef) -> 0 @@ -2598,7 +2633,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { bool isInteger = LL.getValueType().isInteger(); ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && - (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + (!LegalOperations || + TLI.isCondCodeLegal(Result, LL.getSimpleValueType()))) return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), LL, LR, Result); } @@ -2758,7 +2794,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } } - return SDValue(); } @@ -2951,7 +2986,8 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - if (N1.getOpcode() == ISD::OR) { + if (N1.getOpcode() == ISD::OR && + N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) SDValue N000 = N00.getOperand(0); if (!isBSwapHWordElement(N000, Parts)) @@ -2994,7 +3030,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); - else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), @@ -3013,6 +3049,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (or x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + + // fold (or x, -1) -> -1, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N1; } // fold (or x, undef) -> -1 @@ -3095,7 +3143,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { bool isInteger = LL.getValueType().isInteger(); ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && - (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + (!LegalOperations || + TLI.isCondCodeLegal(Result, LL.getSimpleValueType()))) return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), LL, LR, Result); } @@ -3212,11 +3261,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if ((LShVal + RShVal) != OpSizeInBits) return 0; - SDValue Rot; - if (HasROTL) - Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); - else - Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -3249,12 +3295,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTL) - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); } } } @@ -3266,25 +3308,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTR) - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } } } // Look for sign/zext/any-extended or truncate cases: - if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && - (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { SDValue LExtOp0 = LHSShiftAmt.getOperand(0); SDValue RExtOp0 = RHSShiftAmt.getOperand(0); if (RExtOp0.getOpcode() == ISD::SUB && @@ -3333,6 +3371,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). @@ -3363,7 +3407,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), isInt); - if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { + if (!LegalOperations || + TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { switch (N0.getOpcode()) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); @@ -4056,7 +4101,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && - TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) && + TLI.getBooleanContents(false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -4422,20 +4468,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); - if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); - } + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -5029,11 +5073,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // At this point, we must have a load or else we can't do the transform. if (!isa<LoadSDNode>(N0)) return SDValue(); + // Because a SRL must be assumed to *need* to zero-extend the high bits + // (as opposed to anyext the high bits), we can't combine the zextload + // lowering of SRL and an sextload. + if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) + return SDValue(); + // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). - // If the load was a sextload then the result is a splat of the sign bit - // of the extended byte. This is not worth optimizing for. if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) return SDValue(); } @@ -5191,6 +5239,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + AddToWorkList(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use @@ -5245,13 +5294,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // if the source is smaller than the dest, we still need an extend return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); - else if (N0.getOperand(0).getValueType().bitsGT(VT)) + if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); - else - // if the source and dest are the same type, we can drop both the extend - // and the truncate. - return N0.getOperand(0); + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); } // Fold extract-and-trunc into a narrow extract. For example: @@ -5311,6 +5359,48 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (Reduced.getNode()) return Reduced; } + // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), + // where ... are all 'undef'. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { + SmallVector<EVT, 8> VTs; + SDValue V; + unsigned Idx = 0; + unsigned NumDefs = 0; + + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue X = N0.getOperand(i); + if (X.getOpcode() != ISD::UNDEF) { + V = X; + Idx = i; + NumDefs++; + } + // Stop if more than one members are non-undef. + if (NumDefs > 1) + break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), + VT.getVectorElementType(), + X.getValueType().getVectorNumElements())); + } + + if (NumDefs == 0) + return DAG.getUNDEF(VT); + + if (NumDefs == 1) { + assert(V.getNode() && "The single defined operand is empty!"); + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (i != Idx) { + Opnds.push_back(DAG.getUNDEF(VTs[i])); + continue; + } + SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); + AddToWorkList(NV.getNode()); + Opnds.push_back(NV); + } + return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + &Opnds[0], Opnds.size()); + } + } // Simplify the operands using demanded-bits information. if (!VT.isVector() && @@ -5348,7 +5438,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { !LD2->isVolatile() && DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); - unsigned NewAlign = TLI.getTargetData()-> + unsigned NewAlign = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && @@ -5417,7 +5507,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { !cast<LoadSDNode>(N0)->isVolatile() && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - unsigned Align = TLI.getTargetData()-> + unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); @@ -5440,7 +5530,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && - N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { + N0.getNode()->hasOneUse() && VT.isInteger() && + !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); @@ -5663,7 +5754,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // fold (fadd c1, c2) -> c1 + c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -5674,12 +5765,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); @@ -5691,6 +5782,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // If allow, fold (fadd (fneg x), x) -> 0.0 + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + return DAG.getConstantFP(0.0, VT); + } + + // If allow, fold (fadd x, (fneg x)) -> 0.0 + if (DAG.getTarget().Options.UnsafeFPMath && + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + return DAG.getConstantFP(0.0, VT); + } + // In unsafe math mode, we can fold chains of FADD's of the same value // into multiplications. This transform is not safe in general because // we are reducing the number of rounding steps. @@ -5850,7 +5953,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub c1, c2) -> c1-c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); // fold (fsub A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && @@ -5942,7 +6045,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // fold (fmul c1, c2) -> c1*c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -6000,6 +6103,12 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N0CFP && N0CFP->isZero()) + return N2; + if (N1CFP && N1CFP->isZero()) + return N2; + } if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -6079,11 +6188,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // fold (fdiv c1, c2) -> c1/c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { // Compute the reciprocal 1.0 / c2. APFloat N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 @@ -6126,7 +6235,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); return SDValue(); @@ -6139,7 +6248,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold + if (N0CFP && N1CFP) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); if (N1CFP) { @@ -6189,7 +6298,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128 && + if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -6246,7 +6355,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128 && + if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -6301,7 +6410,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6314,7 +6423,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp - if (N0CFP && N0.getValueType() != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x @@ -6368,7 +6477,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the @@ -6409,28 +6518,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector() && !LegalOperations) { - // If operand is a BUILD_VECTOR node, see if we can constant fold it. - if (N0.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { - SDValue Op = N0.getOperand(i); - if (Op.getOpcode() != ISD::UNDEF && - Op.getOpcode() != ISD::ConstantFP) - break; - EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(ISD::FNEG, N0.getDebugLoc(), EltVT, Op); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorkList(FoldOp.getNode()); - } - - if (Ops.size() == N0.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VT, &Ops[0], Ops.size()); - } + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVUnaryOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; } if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), @@ -6474,7 +6564,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6486,7 +6576,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6498,7 +6588,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6509,8 +6599,13 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVUnaryOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (fabs c1) -> fabs(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) @@ -7344,7 +7439,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { // start at the previous one. if (ShAmt % NewBW) ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; - APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, + std::min(BitWidth, ShAmt + NewBW)); if ((Imm & Mask) == Imm) { APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); if (Opc == ISD::AND) @@ -7357,7 +7453,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) + if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -7419,7 +7515,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { unsigned LDAlign = LD->getAlignment(); unsigned STAlign = ST->getAlignment(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); + unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -7444,6 +7540,477 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } +/// Returns the base pointer and an integer offset from that object. +static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) { + if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + SDValue Base = Ptr->getOperand(0); + return std::make_pair(Base, Offset); + } + + return std::make_pair(Ptr, 0); +} + +/// Holds a pointer to an LSBaseSDNode as well as information on where it +/// is located in a sequence of memory operations connected by a chain. +struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; +}; + +/// Sorts store nodes in a link according to their offset from a shared +// base ptr. +struct ConsecutiveMemoryChainSorter { + bool operator()(MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + } +}; + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; + + // Don't merge vectors into wider inputs. + if (MemVT.isVector() || !MemVT.isSimple()) + return false; + + // Perform an early exit check. Do not bother looking at stored values that + // are not constants or loads. + SDValue StoredVal = St->getValue(); + bool IsLoadSrc = isa<LoadSDNode>(StoredVal); + if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && + !IsLoadSrc) + return false; + + // Only look at ends of store sequences. + SDValue Chain = SDValue(St, 1); + if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) + return false; + + // This holds the base pointer and the offset in bytes from the base pointer. + std::pair<SDValue, int64_t> BasePtr = + GetPointerBaseAndOffset(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.first.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.first.getOpcode() == ISD::UNDEF) + return false; + + // Save the LoadSDNodes that we find in the chain. + // We need to make sure that these nodes do not interfere with + // any of the store nodes. + SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; + + // Save the StoreSDNodes that we find in the chain. + SmallVector<MemOpLink, 8> StoreNodes; + + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + unsigned Seq = 0; + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 1)->hasOneUse()) + break; + + // Find the base pointer and offset for this memory node. + std::pair<SDValue, int64_t> Ptr = + GetPointerBaseAndOffset(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (Ptr.first.getNode() != BasePtr.first.getNode()) + break; + + // Check that the alignment is the same. + if (Index->getAlignment() != St->getAlignment()) + break; + + // The memory operands must not be volatile. + if (Index->isVolatile() || Index->isIndexed()) + break; + + // No truncation. + if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) + if (St->isTruncatingStore()) + break; + + // The stored memory type must be the same. + if (Index->getMemoryVT() != MemVT) + break; + + // We do not allow unaligned stores because we want to prevent overriding + // stores. + if (Index->getAlignment()*8 != MemVT.getSizeInBits()) + break; + + // We found a potential memory operand to merge. + StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (1) { + if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { + // We found a store node. Use it for the next iteration. + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + // Save the load node for later. Continue the scan. + AliasLoadNodes.push_back(Ldn); + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = NULL; + break; + } + } + } + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the base pointer. + std::sort(StoreNodes.begin(), StoreNodes.end(), + ConsecutiveMemoryChainSorter()); + + // Scan the memory operations on the chain and find the first non-consecutive + // store memory address. + unsigned LastConsecutiveStore = 0; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { + + // Check that the addresses are consecutive starting from the second + // element in the list of stores. + if (i > 0) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + } + + bool Alias = false; + // Check if this store interferes with any of the loads that we found. + for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) + if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { + Alias = true; + break; + } + // We found a load that alias with this store. Stop the sequence. + if (Alias) + break; + + // Mark this node as useful. + LastConsecutiveStore = i; + } + + // The node with the lowest store address. + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + + // Store the constants into memory as one consecutive store. + if (!IsLoadSrc) { + unsigned LastLegalType = 0; + unsigned LastLegalVectorType = 0; + bool NonZero = false; + for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { + NonZero |= !C->isNullValue(); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { + NonZero |= !C->getConstantFPValue()->isNullValue(); + } else { + // Non constant. + break; + } + + // Find a legal type for the constant store. + unsigned StoreBW = (i+1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + if (TLI.isTypeLegal(StoreTy)) + LastLegalType = i+1; + + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty)) + LastLegalVectorType = i + 1; + } + + // We only use vectors if the constant is known to be zero and the + // function is not marked with the noimplicitfloat attribute. + if (NonZero || (DAG.getMachineFunction().getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat))) + LastLegalVectorType = 0; + + // Check if we found a legal integer type to store. + if (LastLegalType == 0 && LastLegalVectorType == 0) + return false; + + bool UseVector = LastLegalVectorType > LastLegalType; + unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; + + // Make sure we have something to merge. + if (NumElem < 2) + return false; + + unsigned EarliestNodeUsed = 0; + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // The earliest Node in the DAG. + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); + + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + StoredVal = DAG.getConstant(0, Ty); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ?(NumElem - 1 - i) : i; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt<<=ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { + StoreInt|=C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { + StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + assert(false && "Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, StoreTy); + } + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the first store with the new store + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + removeFromWorkList(St); + DAG.DeleteNode(St); + } + + return true; + } + + // Below we handle the case of multiple consecutive stores that + // come from multiple consecutive loads. We merge them into a single + // wide load and a single wide store. + + // Look for load nodes which are used by the stored values. + SmallVector<MemOpLink, 8> LoadNodes; + + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + SDValue LdBasePtr; + for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); + if (!Ld) break; + + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + break; + + // Check that the alignment is the same as the stores. + if (Ld->getAlignment() != St->getAlignment()) + break; + + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) + break; + + // We do not accept ext loads. + if (Ld->getExtensionType() != ISD::NON_EXTLOAD) + break; + + // The stored memory type must be the same. + if (Ld->getMemoryVT() != MemVT) + break; + + std::pair<SDValue, int64_t> LdPtr = + GetPointerBaseAndOffset(Ld->getBasePtr()); + + // If this is not the first ptr that we check. + if (LdBasePtr.getNode()) { + // The base ptr must be the same. + if (LdPtr.first != LdBasePtr) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr.first; + } + + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); + } + + if (LoadNodes.size() < 2) + return false; + + // Scan the memory operations on the chain and find the first non-consecutive + // load memory address. These variables hold the index in the store node + // array. + unsigned LastConsecutiveLoad = 0; + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 0; + unsigned LastLegalIntegerType = 0; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = LoadNodes[0].MemNode->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads much share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; + + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + + // Find a legal type for the vector store. + EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(StoreTy)) + LastLegalVectorType = i + 1; + + // Find a legal type for the integer store. + unsigned StoreBW = (i+1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + if (TLI.isTypeLegal(StoreTy)) + LastLegalIntegerType = i + 1; + } + + // Only use vector types if the vector type is larger than the integer type. + // If they are the same, use integers. + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType; + unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) + return false; + + // The earliest Node in the DAG. + unsigned EarliestNodeUsed = 0; + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + for (unsigned i=1; i<NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + } + + DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); + DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); + + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, + FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), + false, false, false, + FirstLoad->getAlignment()); + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), false, false, + FirstInChain->getAlignment()); + + // Replace one of the loads with the new load. + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + + // Remove the rest of the load chains. + for (unsigned i = 1; i < NumElem ; ++i) { + // Replace all chain users of the old load nodes with the chain of the new + // load node. + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); + } + + // Replace the first store with the new store. + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + // Remove all Store nodes. + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); + removeFromWorkList(St); + DAG.DeleteNode(St); + } + + return true; +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -7456,7 +8023,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = TLI.getTargetData()-> + unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || @@ -7645,6 +8212,24 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->getAlignment()); } + // Only perform this optimization before the types are legal, because we + // don't want to perform this optimization on every DAGCombine invocation. + if (!LegalTypes) { + bool EverChanged = false; + + do { + // There can be multiple store sequences on the same chain. + // Keep trying to merge store sequences until we are unable to do so + // or until we merge the last store on the chain. + bool Changed = MergeConsecutiveStores(ST); + EverChanged |= Changed; + if (!Changed) break; + } while (ST->getOpcode() != ISD::DELETED_NODE); + + if (EverChanged) + return SDValue(N, 0); + } + return ReduceLoadOpStoreWidth(N); } @@ -7723,9 +8308,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because - // we may introduce new vector instructions which are not backed by TD patterns. - // For example on AVX, extracting elements from a wide vector without using - // extract_subvector. + // we may introduce new vector instructions which are not backed by TD + // patterns. For example on AVX, extracting elements from a wide vector + // without using extract_subvector. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE && ConstEltNo && !LegalOperations) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); @@ -7844,7 +8429,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData() + TLI.getDataLayout() ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) @@ -7909,15 +8494,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { +// Simplify (build_vec (ext )) to (bitcast (build_vec )) +SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return SDValue(); + unsigned NumInScalars = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - // A vector built entirely of undefs is undef. - if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(VT); - // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR @@ -7960,64 +8551,142 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. - EVT OutScalarTy = N->getValueType(0).getScalarType(); + EVT OutScalarTy = VT.getScalarType(); bool ValidTypes = SourceType != MVT::Other && isPowerOf2_32(OutScalarTy.getSizeInBits()) && isPowerOf2_32(SourceType.getSizeInBits()); - // We perform this optimization post type-legalization because - // the type-legalizer often scalarizes integer-promoted vectors. - // Performing this optimization before may create bit-casts which - // will be type-legalized to complex code sequences. - // We perform this optimization only before the operation legalizer because we - // may introduce illegal operations. // Create a new simpler BUILD_VECTOR sequence which other optimizations can // turn into a single shuffle instruction. - if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && - ValidTypes) { - bool isLE = TLI.isLittleEndian(); - unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); - assert(ElemRatio > 1 && "Invalid element size ratio"); - SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SourceType); - - unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); - SmallVector<SDValue, 8> Ops(NewBVElems, Filler); - - // Populate the new build_vector - for (unsigned i=0; i < N->getNumOperands(); ++i) { - SDValue Cast = N->getOperand(i); - assert((Cast.getOpcode() == ISD::ANY_EXTEND || - Cast.getOpcode() == ISD::ZERO_EXTEND || - Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); - SDValue In; - if (Cast.getOpcode() == ISD::UNDEF) - In = DAG.getUNDEF(SourceType); - else - In = Cast->getOperand(0); - unsigned Index = isLE ? (i * ElemRatio) : - (i * ElemRatio + (ElemRatio - 1)); + if (!ValidTypes) + return SDValue(); + + bool isLE = TLI.isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); + + unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == VT.getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, VT, BV); +} + +SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + + EVT SrcVT = MVT::Other; + unsigned Opcode = ISD::DELETED_NODE; + unsigned NumDefs = 0; + + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + unsigned Opc = In.getOpcode(); + + if (Opc == ISD::UNDEF) + continue; - assert(Index < Ops.size() && "Invalid index"); - Ops[Index] = In; + // If all scalar values are floats and converted from integers. + if (Opcode == ISD::DELETED_NODE && + (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { + Opcode = Opc; } - // The type of the new BUILD_VECTOR node. - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); - assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && - "Invalid vector size"); - // Check if the new vector type is legal. - if (!isTypeLegal(VecVT)) return SDValue(); + if (Opc != Opcode) + return SDValue(); + + EVT InVT = In.getOperand(0).getValueType(); - // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VecVT, &Ops[0], Ops.size()); + // If all scalar values are typed differently, bail out. It's chosen to + // simplify BUILD_VECTOR of integer types. + if (SrcVT == MVT::Other) + SrcVT = InVT; + if (SrcVT != InVT) + return SDValue(); + NumDefs++; + } + + // If the vector has just one element defined, it's not worth to fold it into + // a vectorized one. + if (NumDefs < 2) + return SDValue(); + + assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) + && "Should only handle conversion from integer to float."); + assert(SrcVT != MVT::Other && "Cannot determine source type!"); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); + + if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) + return SDValue(); + + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); - // The new BUILD_VECTOR node has the potential to be further optimized. - AddToWorkList(BV.getNode()); - // Bitcast to the desired type. - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + if (In.getOpcode() == ISD::UNDEF) + Opnds.push_back(DAG.getUNDEF(SrcVT)); + else + Opnds.push_back(In.getOperand(0)); } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + &Opnds[0], Opnds.size()); + AddToWorkList(BV.getNode()); + + return DAG.getNode(Opcode, dl, VT, BV); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + SDValue V = reduceBuildVecExtToExtBuildVec(N); + if (V.getNode()) + return V; + + V = reduceBuildVecConvertToConvertBuildVec(N); + if (V.getNode()) + return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from @@ -8102,14 +8771,15 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } // If VecIn2 is unused then change it to undef. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); - // Check that we were able to transform all incoming values to the same type. + // Check that we were able to transform all incoming values to the same + // type. if (VecIn2.getValueType() != VecIn1.getValueType() || VecIn1.getValueType() != VT) return SDValue(); @@ -8122,7 +8792,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue Ops[2]; Ops[0] = VecIn1; Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); } return SDValue(); @@ -8158,8 +8828,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); // Only handle cases where both indexes are constants with the same type. - ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); - ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); + ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); if (InsIdx && ExtIdx && InsIdx->getValueType(0).getSizeInBits() <= 64 && @@ -8176,6 +8846,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } } + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + if (V->getOperand(0).getValueType() != NVT) + return SDValue(); + unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + return SDValue(); } @@ -8491,6 +9176,44 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } +/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. +SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { + // After legalize, the target may be depending on adds and other + // binary ops to provide legal ways to construct constants or other + // things. Simplifying them may result in a loss of legality. + if (LegalOperations) return SDValue(); + + assert(N->getValueType(0).isVector() && + "SimplifyVUnaryOp only works on vectors!"); + + SDValue N0 = N->getOperand(0); + + if (N0.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + // Operand is a BUILD_VECTOR node, see if we can constant fold it. + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.getOpcode() != ISD::UNDEF && + Op.getOpcode() != ISD::ConstantFP) + break; + EVT EltVT = Op.getValueType(); + SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); + } + + if (Ops.size() != N0.getNumOperands()) + return SDValue(); + + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + N0.getValueType(), &Ops[0], Ops.size()); +} + SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -8574,6 +9297,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) return false; + // The loads must not depend on one another. + if (LLD->isPredecessorOf(RLD) || + RLD->isPredecessorOf(LLD)) + return false; Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), LLD->getBasePtr(), @@ -8693,7 +9420,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, const_cast<ConstantFP*>(TV->getConstantFPValue()) }; Type *FPTy = Elts[0]->getType(); - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); @@ -8808,34 +9535,38 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); // Get a SetCC of the condition - // FIXME: Should probably make sure that setcc is legal if we ever have a - // target where it isn't. - SDValue Temp, SCC; - // cast from setcc result type to select result type - if (LegalTypes) { - SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), - N0, N1, CC); - if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); - else + // NOTE: Don't create a SETCC if it's not legal on this target. + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, + LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), N2.getValueType(), SCC); - } else { - SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), - N2.getValueType(), SCC); - } + } - AddToWorkList(SCC.getNode()); - AddToWorkList(Temp.getNode()); + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); - if (N2C->getAPIntValue() == 1) - return Temp; + if (N2C->getAPIntValue() == 1) + return Temp; - // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); + } } // Check to see if this is the equivalent of setcc @@ -8918,7 +9649,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); + DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } @@ -9061,6 +9792,23 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, return true; } +bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { + SDValue Ptr0, Ptr1; + int64_t Size0, Size1; + const Value *SrcValue0, *SrcValue1; + int SrcValueOffset0, SrcValueOffset1; + unsigned SrcValueAlign0, SrcValueAlign1; + const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; + FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + SrcValueAlign0, SrcTBAAInfo0); + FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + SrcValueAlign1, SrcTBAAInfo1); + return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + SrcValueAlign0, SrcTBAAInfo0, + Ptr1, Size1, SrcValue1, SrcValueOffset1, + SrcValueAlign1, SrcTBAAInfo1); +} + /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 683fac6..0d90a07 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -40,27 +40,27 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " @@ -737,15 +737,15 @@ bool FastISel::SelectBitCast(const User *I) { } // Bitcasts of other values become reg-reg copies or BITCAST operators. - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); - - if (SrcVT == MVT::Other || !SrcVT.isSimple() || - DstVT == MVT::Other || !DstVT.isSimple() || - !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) + EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstEVT = TLI.getValueType(I->getType()); + if (SrcEVT == MVT::Other || DstEVT == MVT::Other || + !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT)) // Unhandled type. Halt "fast" selection and bail. return false; + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DstVT = DstEVT.getSimpleVT(); unsigned Op0 = getRegForValue(I->getOperand(0)); if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. @@ -755,7 +755,7 @@ bool FastISel::SelectBitCast(const User *I) { // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; - if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { + if (SrcVT == DstVT) { const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. @@ -768,8 +768,7 @@ bool FastISel::SelectBitCast(const User *I) { // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), - ISD::BITCAST, Op0, Op0IsKill); + ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); if (!ResultReg) return false; @@ -1059,7 +1058,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo, MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), TM(FuncInfo.MF->getTarget()), - TD(*TM.getTargetData()), + TD(*TM.getDataLayout()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), TRI(*TM.getRegisterInfo()), diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b2a2a5c..b46edad 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,29 +13,29 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> using namespace llvm; @@ -66,8 +66,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(Fn->getReturnType(), - Fn->getAttributes().getRetAttributes(), Outs, TLI); + GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI); CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, Fn->isVarArg(), Outs, Fn->getContext()); @@ -80,9 +79,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. @@ -208,7 +207,7 @@ void FunctionLoweringInfo::clear() { } /// CreateReg - Allocate a single virtual register for the given type. -unsigned FunctionLoweringInfo::CreateReg(EVT VT) { +unsigned FunctionLoweringInfo::CreateReg(MVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } @@ -226,7 +225,7 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); + MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 6d2cdea..3b1abd7 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -16,18 +16,18 @@ #define DEBUG_TYPE "instr-emitter" #include "InstrEmitter.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; /// MinRCSize - Smallest register class we allow when constraining virtual @@ -99,7 +99,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; - EVT VT = Node->getValueType(ResNo); + MVT VT = Node->getSimpleValueType(ResNo); // Stick to the preferred register classes for legal types. if (TLI->isTypeLegal(VT)) @@ -124,7 +124,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, SDValue Op = User->getOperand(i); if (Op.getNode() != Node || Op.getResNo() != ResNo) continue; - EVT VT = Node->getValueType(Op.getResNo()); + MVT VT = Node->getSimpleValueType(Op.getResNo()); if (VT == MVT::Other || VT == MVT::Glue) continue; Match = false; @@ -203,7 +203,8 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, return 0; } -void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, +void InstrEmitter::CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { @@ -222,7 +223,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, unsigned NumResults = CountResults(Node); VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + MIB.addReg(VRBase, RegState::Define); } if (!VRBase && !IsClone && !IsCloned) @@ -237,7 +238,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; - MI->addOperand(MachineOperand::CreateReg(Reg, true)); + MIB.addReg(VRBase, RegState::Define); break; } } @@ -249,7 +250,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, if (VRBase == 0) { assert(RC && "Isn't a register operand!"); VRBase = MRI->createVirtualRegister(RC); - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + MIB.addReg(VRBase, RegState::Define); } SDValue Op(Node, i); @@ -272,7 +273,8 @@ unsigned InstrEmitter::getVR(SDValue Op, // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { - const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); + const TargetRegisterClass *RC = + TLI->getRegClassFor(Op.getSimpleValueType()); VReg = MRI->createVirtualRegister(RC); } BuildMI(*MBB, InsertPos, Op.getDebugLoc(), @@ -290,7 +292,8 @@ unsigned InstrEmitter::getVR(SDValue Op, /// specified machine instr. Insert register copies if the register is /// not in the required register class. void -InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, +InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, @@ -302,7 +305,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned VReg = getVR(Op, VRBaseMap); assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && MCID.OpInfo[IIOpNum].isOptionalDef(); @@ -314,8 +317,6 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); - assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && - "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), @@ -336,66 +337,63 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, !IsDebug && !(IsClone || IsCloned); if (isKill) { - unsigned Idx = MI->getNumOperands(); + unsigned Idx = MIB->getNumOperands(); while (Idx > 0 && - MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) + MIB->getOperand(Idx-1).isReg() && + MIB->getOperand(Idx-1).isImplicit()) --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1; + bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1; if (isTied) isKill = false; } - MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, - false/*isImp*/, isKill, - false/*isDead*/, false/*isUndef*/, - false/*isEarlyClobber*/, - 0/*SubReg*/, IsDebug)); + MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) | + getDebugRegState(IsDebug)); } /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. -void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, + SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); + MIB.addImm(C->getSExtValue()); } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { - const ConstantFP *CFP = F->getConstantFPValue(); - MI->addOperand(MachineOperand::CreateFPImm(CFP)); + MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { // Turn additional physreg operands into implicit uses on non-variadic // instructions. This is used by call and return instructions passing // arguments in registers. bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); - MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp)); + MIB.addReg(R->getReg(), getImplRegState(Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); + MIB.addRegMask(RM->getRegMask()); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), - TGA->getTargetFlags())); + MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), + TGA->getTargetFlags()); } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock())); + MIB.addMBB(BBNode->getBasicBlock()); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateFI(FI->getIndex())); + MIB.addFrameIndex(FI->getIndex()); } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(), - JT->getTargetFlags())); + MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { int Offset = CP->getOffset(); unsigned Align = CP->getAlignment(); Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = TM->getTargetData()->getPrefTypeAlignment(Type); + Align = TM->getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = TM->getTargetData()->getTypeAllocSize(Type); + Align = TM->getDataLayout()->getTypeAllocSize(Type); } } @@ -405,29 +403,26 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); else Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); - MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, - CP->getTargetFlags())); + MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateES(ES->getSymbol(), - ES->getTargetFlags())); + MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), - BA->getTargetFlags())); + MIB.addBlockAddress(BA->getBlockAddress(), + BA->getOffset(), + BA->getTargetFlags()); } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { - MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), - TI->getOffset(), - TI->getTargetFlags())); + MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags()); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"); - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } } unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - EVT VT, DebugLoc DL) { + MVT VT, DebugLoc DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -478,7 +473,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // constraints on the %dst register, COPY can target all legal register // classes. unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0)); + const TargetRegisterClass *TRC = + TLI->getRegClassFor(Node->getSimpleValueType(0)); unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); MachineInstr *DefMI = MRI->getVRegDef(VReg); @@ -501,7 +497,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // constrain its register class or issue a COPY to a compatible register // class. VReg = ConstrainForSubReg(VReg, SubIdx, - Node->getOperand(0).getValueType(), + Node->getOperand(0).getSimpleValueType(), Node->getDebugLoc()); // Create the destreg if it is missing. @@ -533,7 +529,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // // There is no constraint on the %src register class. // - const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0)); + const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); @@ -541,22 +537,22 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(SRC); // Create the insert_subreg or subreg_to_reg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + MachineInstrBuilder MIB = + BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase); // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetOpcode::SUBREG_TO_REG) { const ConstantSDNode *SD = cast<ConstantSDNode>(N0); - MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); + MIB.addImm(SD->getZExtValue()); } else - AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted - AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - MI->addOperand(MachineOperand::CreateImm(SubIdx)); - MBB->insert(InsertPos, MI); + MIB.addImm(SubIdx); + MBB->insert(InsertPos, MIB); } else llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); @@ -597,12 +593,11 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); unsigned NumOps = Node->getNumOperands(); assert((NumOps & 1) == 1 && "REG_SEQUENCE must have an odd number of operands!"); - const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); if ((i & 1) == 0) { @@ -621,11 +616,11 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, } } } - AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); } - MBB->insert(InsertPos, MI); + MBB->insert(InsertPos, MIB); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; (void)isNew; // Silence compiler warning. @@ -662,7 +657,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, if (I==VRBaseMap.end()) MIB.addReg(0U); // undef else - AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); @@ -738,12 +733,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, #endif // Create the new machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II); // Add result register values for things that are defined by this // instruction. if (NumResults) - CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. @@ -752,17 +747,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, "Unable to cope with optional defs and phys regs defs!"); unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. - MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), + MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. - MBB->insert(InsertPos, MI); + MBB->insert(InsertPos, MIB); // The MachineInstr may also define physregs instead of virtregs. These // physreg values can reach other instructions in different ways: @@ -820,13 +815,13 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Finally mark unused registers as dead. if (!UsedRegs.empty() || II.getImplicitDefs()) - MI->setPhysRegsDeadExcept(UsedRegs, *TRI); + MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. #ifdef NDEBUG if (II.hasPostISelHook()) #endif - TLI->AdjustInstrPostInstrSelection(MI, Node); + TLI->AdjustInstrPostInstrSelection(MIB, Node); } /// EmitSpecialNode - Generate machine code for a target-independent node and @@ -890,19 +885,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::INLINEASM)); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); - MI->addOperand(MachineOperand::CreateES(AsmStr)); + MIB.addExternalSymbol(AsmStr); - // Add the HasSideEffect and isAlignStack bits. + // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore + // bits. int64_t ExtraInfo = cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); - MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); + MIB.addImm(ExtraInfo); // Remember to operand index of the group flags. SmallVector<unsigned, 8> GroupIdx; @@ -913,8 +909,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); - GroupIdx.push_back(MI->getNumOperands()); - MI->addOperand(MachineOperand::CreateImm(Flags)); + GroupIdx.push_back(MIB->getNumOperands()); + MIB.addImm(Flags); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { @@ -925,20 +921,16 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. - MI->addOperand(MachineOperand::CreateReg(Reg, true, - /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, RegState::Define | + getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, - /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), - /*isKill=*/ false, - /*isDead=*/ false, - /*isUndef=*/false, - /*isEarlyClobber=*/ true)); + MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | + getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegUse: // Use of register. @@ -947,7 +939,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) - AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, + AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. @@ -957,7 +949,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned DefIdx = GroupIdx[DefGroup] + 1; unsigned UseIdx = GroupIdx.back() + 1; for (unsigned j = 0; j != NumVals; ++j) - MI->tieOperands(DefIdx + j, UseIdx + j); + MIB->tieOperands(DefIdx + j, UseIdx + j); } } break; @@ -968,9 +960,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); if (MD) - MI->addOperand(MachineOperand::CreateMetadata(MD)); + MIB.addMetadata(MD); - MBB->insert(InsertPos, MI); + MBB->insert(InsertPos, MIB); break; } } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 9eddee9..a9c2203 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -16,12 +16,13 @@ #ifndef INSTREMITTER_H #define INSTREMITTER_H -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SelectionDAG.h" namespace llvm { +class MachineInstrBuilder; class MCInstrDesc; class SDDbgValue; @@ -48,7 +49,8 @@ class InstrEmitter { unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const; - void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + void CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); @@ -61,7 +63,8 @@ class InstrEmitter { /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is /// not in the required register class. - void AddRegisterOperand(MachineInstr *MI, SDValue Op, + void AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, @@ -71,7 +74,8 @@ class InstrEmitter { /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. IIOpNum and II are used for /// assertions only. - void AddOperand(MachineInstr *MI, SDValue Op, + void AddOperand(MachineInstrBuilder &MIB, + SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, @@ -81,7 +85,7 @@ class InstrEmitter { /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - EVT VT, DebugLoc DL); + MVT VT, DebugLoc DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7b34170..5eaf67e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,26 +11,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -101,7 +102,7 @@ private: SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, - RTLIB::Libcall Call_PPCF128); + RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, @@ -321,7 +322,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. EVT StoredVT = ST->getMemoryVT(); - EVT RegVT = + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); @@ -447,7 +448,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Copy the value to a (aligned) stack slot using (unaligned) integer // loads and stores, then do a (aligned) load from the stack slot. - EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; @@ -710,7 +711,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { { SDValue Value = ST->getValue(); - EVT VT = Value.getValueType(); + MVT VT = Value.getSimpleValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: @@ -718,7 +719,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); @@ -731,9 +732,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { return; } case TargetLowering::Promote: { - assert(VT.isVector() && "Unknown legal promote case!"); - Value = DAG.getNode(ISD::BITCAST, dl, - TLI.getTypeToPromoteTo(ISD::STORE, VT), Value); + MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote stores to same size type"); + Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, @@ -817,14 +819,15 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); ReplaceNode(SDValue(Node, 0), Result); } else { - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), + StVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } @@ -862,38 +865,36 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { - EVT VT = Node->getValueType(0); + MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, RVal, RChain); - } - } - break; + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); + } + } + break; case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(RVal, DAG); - if (Res.getNode()) { - RVal = Res; - RChain = Res.getValue(1); - } - break; + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; } case TargetLowering::Promote: { - // Only promote a load of vector type to another. - assert(VT.isVector() && "Cannot promote this load!"); - // Change base type to a different vector type. - EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote loads to same size type"); SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), @@ -1038,7 +1039,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Ch; } else { bool isCustom = false; - switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; @@ -1060,7 +1061,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); + TLI.getDataLayout()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); @@ -1185,7 +1186,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : Node->getOpcode() == ISD::SETCC ? 2 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; - EVT OpVT = Node->getOperand(CompareOperand).getValueType(); + MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); Action = TLI.getCondCodeAction(CCCode, OpVT); @@ -1241,6 +1242,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::DEBUGTRAP: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Expand) { + // replace ISD::DEBUGTRAP with ISD::TRAP + SDValue NewVal; + NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(), + Node->getOperand(0)); + ReplaceNode(Node, NewVal.getNode()); + LegalizeOp(NewVal.getNode()); + return; + } + break; + default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1579,7 +1593,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl) { - EVT OpVT = LHS.getValueType(); + MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); @@ -1588,26 +1602,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, break; case TargetLowering::Expand: { ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break; - case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break; - // FIXME: Implement more expansions. - } - - SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); - SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + case ISD::SETO: + assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) + == TargetLowering::Legal + && "If SETO is expanded, SETOEQ must be legal!"); + CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETUO: + assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) + == TargetLowering::Legal + && "If SETUO is expanded, SETUNE must be legal!"); + CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // If we are floating point, assign and break, otherwise fall through. + if (!OpVT.isInteger()) { + // We can use the 4th bit to tell if we are the unordered + // or ordered version of the opcode. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; + CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); + break; + } + // Fallthrough if we are unsigned integer. + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETLT: + case ISD::SETNE: + case ISD::SETEQ: + InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { + // We only support using the inverted operation and not a + // different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); + } + LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); + RHS = SDValue(); + CC = SDValue(); + return; + } + + SDValue SetCC1, SetCC2; + if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { + // If we aren't the ordered or unorder operation, + // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + } else { + // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + } LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); @@ -1626,7 +1685,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, DebugLoc dl) { // Create the stack frame object. unsigned SrcAlign = - TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). + TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType(). getTypeForEVT(*DAG.getContext())); SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); @@ -1638,7 +1697,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); - unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType); + unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -1782,6 +1841,26 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } +static bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + SDValue &Chain, const TargetLowering &TLI) { + const Function *F = DAG.getMachineFunction().getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); + if (AttrBuilder(CallerRetAttr) + .removeAttribute(Attribute::NoAlias).hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerRetAttr.hasAttribute(Attribute::ZExt) || + CallerRetAttr.hasAttribute(Attribute::SExt)) + return false; + + // Check if the only use is a function return node. + return TLI.isUsedByReturnOnly(Node, Chain); +} + // ExpandLibCall - Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result @@ -1899,6 +1978,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { @@ -1906,6 +1986,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; + case MVT::f128: LC = Call_F128; break; case MVT::ppcf128: LC = Call_PPCF128; break; } return ExpandLibCall(LC, Node, false); @@ -2787,7 +2868,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, - DAG.getConstant(TLI.getTargetData()-> + DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer @@ -2975,77 +3056,95 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128)); + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_PPCF128)); + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); break; case ISD::FCOS: Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_PPCF128)); + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); break; case ISD::FLOG: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_PPCF128)); + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128)); + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128)); + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_PPCF128)); + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128)); + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128)); + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); break; case ISD::FFLOOR: Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128)); + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); break; case ISD::FCEIL: Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128)); + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); break; case ISD::FRINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_PPCF128)); + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); break; case ISD::FNEARBYINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_PPCF128)); + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128)); break; case ISD::FPOW: Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_PPCF128)); + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_PPCF128)); + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); break; case ISD::FREM: Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_PPCF128)); + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128)); break; case ISD::FMA: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, - RTLIB::FMA_F80, RTLIB::FMA_PPCF128)); + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128)); break; case ISD::FP16_TO_FP32: Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); @@ -3109,6 +3208,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && + // If div is legal, it's better to do the normal expansion + !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && useDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { @@ -3366,7 +3467,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT PTy = TLI.getPointerTy(); - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); @@ -3516,13 +3617,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { void SelectionDAGLegalize::PromoteNode(SDNode *Node) { SmallVector<SDValue, 8> Results; - EVT OVT = Node->getValueType(0); + MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC) { - OVT = Node->getOperand(0).getValueType(); + OVT = Node->getOperand(0).getSimpleValueType(); } - EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e393896..92dc5a9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) - == TargetLowering::Custom) - Res = TLI.LowerOperation(SDValue(N, 0), DAG); - - if (Res.getNode() == 0) { - switch (N->getOpcode()) { - default: - #ifndef NDEBUG - dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); dbgs() << "\n"; - #endif - llvm_unreachable("Do not know how to expand this operator's operand!"); - - case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; - case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; - case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; - - case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; - case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; - case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; - case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; - case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; - case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; - case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), - OpNo); break; - } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; } // If the result is null, the sub-method took care of registering results etc. diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e8e968a..5e33ef1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -19,7 +19,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -644,8 +644,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { EVT SmallVT = LHS.getValueType(); // To determine if the result overflowed in a larger type, we extend the - // input to the larger type, do the multiply, then check the high bits of - // the result to see if the overflow happened. + // input to the larger type, do the multiply (checking if it overflows), + // then also check the high bits of the result to see if overflow happened + // there. if (N->getOpcode() == ISD::SMULO) { LHS = SExtPromotedInteger(LHS); RHS = SExtPromotedInteger(RHS); @@ -653,24 +654,31 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { LHS = ZExtPromotedInteger(LHS); RHS = ZExtPromotedInteger(RHS); } - SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1)); + SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS); - // Overflow occurred iff the high part of the result does not - // zero/sign-extend the low part. + // Overflow occurred if it occurred in the larger type, or if the high part + // of the result does not zero/sign-extend the low part. Check this second + // possibility first. SDValue Overflow; if (N->getOpcode() == ISD::UMULO) { - // Unsigned overflow occurred iff the high part is non-zero. + // Unsigned overflow occurred if the high part is non-zero. SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, DAG.getIntPtrConstant(SmallVT.getSizeInBits())); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); } else { - // Signed overflow occurred iff the high part does not sign extend the low. + // Signed overflow occurred if the high part does not sign extend the low. SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), Mul, DAG.getValueType(SmallVT)); Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); } + // The only other way for overflow to occur is if the multiplication in the + // larger type itself overflowed. + Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow, + SDValue(Mul.getNode(), 1)); + // Use the calculated overflow everywhere. ReplaceValueWith(SDValue(N, 1), Overflow); return Mul; @@ -695,7 +703,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); // The argument is passed as NumRegs registers of type RegVT. @@ -2253,32 +2261,35 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); - EVT PtrVT = TLI.getPointerTy(); - Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); DebugLoc dl = N->getDebugLoc(); // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - DebugLoc DL = N->getDebugLoc(); - SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); SplitInteger(MUL, Lo, Hi); // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETNE); + RHS, DAG.getConstant(0, VT), ISD::SETEQ); SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, DAG.getConstant(1, VT), RHS); - SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); - SDValue Overflow; - Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); + SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, + ISD::SETNE); + Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; } + Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) @@ -2538,7 +2549,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // NOTE: on targets without efficient SELECT of bools, we can always use // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) - TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL); + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); SDValue Tmp1, Tmp2; Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 39337ff..6aea2d8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/CallingConv.h" -#include "llvm/Target/TargetData.h" #include "llvm/ADT/SetVector.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 37f0e60..8c53ba3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -17,12 +17,12 @@ #define SELECTIONDAG_LEGALIZETYPES_H #define DEBUG_TYPE "legalize-types" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { @@ -578,6 +578,7 @@ private: // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); @@ -634,7 +635,7 @@ private: SDValue WidenVecRes_InregOp(SDNode *N); // Widen Vector Operand. - bool WidenVectorOperand(SDNode *N, unsigned ResNo); + bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 06f6bd6..222d1c0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -20,7 +20,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/DataLayout.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -94,14 +94,48 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BITCAST v1i64 on x86, where the operand // is legal but the result is not. - EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); + unsigned NumElems = 2; + EVT ElemVT = NOutVT; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + + // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>. + while (!isTypeLegal(NVT)) { + unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; + // If the element size is smaller than byte, bail. + if (NewSizeInBits < 8) + break; + NumElems *= 2; + ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); + NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); - Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, - DAG.getIntPtrConstant(1)); + + SmallVector<SDValue, 8> Vals; + for (unsigned i = 0; i < NumElems; ++i) + Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, + CastInOp, DAG.getIntPtrConstant(i))); + + // Build Lo, Hi pair by pairing extracted elements if needed. + unsigned Slot = 0; + for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { + // Each iteration will BUILD_PAIR two nodes and append the result until + // there are only two nodes left, i.e. Lo and Hi. + SDValue LHS = Vals[Slot]; + SDValue RHS = Vals[Slot + 1]; + + if (TLI.isBigEndian()) + std::swap(LHS, RHS); + + Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + EVT::getIntegerVT( + *DAG.getContext(), + LHS.getValueType().getSizeInBits() << 1), + LHS, RHS)); + } + Lo = Vals[Slot++]; + Hi = Vals[Slot++]; if (TLI.isBigEndian()) std::swap(Lo, Hi); @@ -116,7 +150,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(NOutVT. + TLI.getDataLayout()->getPrefTypeAlignment(NOutVT. getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 22f8d51..de6bbe3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -142,9 +142,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { } else if (Op.getOpcode() == ISD::STORE) { StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); EVT StVT = ST->getMemoryVT(); - EVT ValVT = ST->getValue().getValueType(); + MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) - switch (TLI.getTruncStoreAction(ValVT, StVT)) { + switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); @@ -221,6 +221,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::FP_ROUND: + case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); @@ -291,10 +293,10 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { // Vector "promotion" is basically just bitcasting and doing the operation // in a different type. For example, x86 promotes ISD::AND on v2i32 to // v1i64. - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); - EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); DebugLoc dl = Op.getDebugLoc(); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4095728..09a50d9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -21,7 +21,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -749,7 +749,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(VecType); + TLI.getDataLayout()->getPrefTypeAlignment(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, false, false, 0); @@ -1030,7 +1030,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; - + case ISD::VSELECT: + Res = SplitVecOp_VSELECT(N, OpNo); + break; case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: @@ -1064,6 +1066,58 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { return false; } +SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { + // The only possibility for an illegal operand is the mask, since result type + // legalization would have handled this node already otherwise. + assert(OpNo == 0 && "Illegal operand must be mask"); + + SDValue Mask = N->getOperand(0); + SDValue Src0 = N->getOperand(1); + SDValue Src1 = N->getOperand(2); + DebugLoc DL = N->getDebugLoc(); + EVT MaskVT = Mask.getValueType(); + assert(MaskVT.isVector() && "VSELECT without a vector mask?"); + + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Lo and Hi have differing types");; + + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + unsigned HiNumElts = Hi.getValueType().getVectorNumElements(); + assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); + + LLVMContext &Ctx = *DAG.getContext(); + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue LoElts = DAG.getIntPtrConstant(LoNumElts); + EVT Src0VT = Src0.getValueType(); + EVT Src0EltTy = Src0VT.getVectorElementType(); + EVT MaskEltTy = MaskVT.getVectorElementType(); + + EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts); + EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts); + EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts); + EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts); + + SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero); + SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero); + + SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts); + SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts); + + SDValue LoMask = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero); + SDValue HiMask = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts); + + SDValue LoSelect = + DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); + SDValue HiSelect = + DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); +} + SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); @@ -2082,16 +2136,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// // Widen Vector Operand //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Widen node operand " << ResNo << ": "; +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + switch (N->getOpcode()) { default: #ifndef NDEBUG - dbgs() << "WidenVectorOperand op #" << ResNo << ": "; + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; #endif diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index c3794d5..473e138 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -21,13 +21,13 @@ #define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -94,9 +94,9 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { continue; for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { - EVT VT = ScegN->getValueType(i); + MVT VT = ScegN->getSimpleValueType(i); if (TLI->isTypeLegal(VT) - && (TLI->getRegClassFor(VT)->getID() == RCId)) { + && (TLI->getRegClassFor(VT)->getID() == RCId)) { NumberDeps++; break; } @@ -132,9 +132,9 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { const SDValue &Op = ScegN->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (TLI->isTypeLegal(VT) - && (TLI->getRegClassFor(VT)->getID() == RCId)) { + && (TLI->getRegClassFor(VT)->getID() == RCId)) { NumberDeps++; break; } @@ -332,7 +332,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { // Gen estimate. for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { - EVT VT = SU->getNode()->getValueType(i); + MVT VT = SU->getNode()->getSimpleValueType(i); if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) && TLI->getRegClassFor(VT)->getID() == RCId) @@ -341,7 +341,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { // Kill estimate. for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { const SDValue &Op = SU->getNode()->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (isa<ConstantSDNode>(Op.getNode())) continue; @@ -485,7 +485,7 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) { if (ScegN->isMachineOpcode()) { // Estimate generated regs. for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { - EVT VT = ScegN->getValueType(i); + MVT VT = ScegN->getSimpleValueType(i); if (TLI->isTypeLegal(VT)) { const TargetRegisterClass *RC = TLI->getRegClassFor(VT); @@ -496,7 +496,7 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) { // Estimate killed regs. for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { const SDValue &Op = ScegN->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (TLI->isTypeLegal(VT)) { const TargetRegisterClass *RC = TLI->getRegClassFor(VT); @@ -604,10 +604,8 @@ SUnit *ResourcePriorityQueue::pop() { std::vector<SUnit *>::iterator Best = Queue.begin(); if (!DisableDFASched) { signed BestCost = SUSchedulingCost(*Best); - for (std::vector<SUnit *>::iterator I = Queue.begin(), + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) { - if (*I == *Best) - continue; if (SUSchedulingCost(*I) > BestCost) { BestCost = SUSchedulingCost(*I); diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 2dcb229..4af7172 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -15,8 +15,8 @@ #define LLVM_CODEGEN_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/DebugLoc.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/DebugLoc.h" namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h index f88b26d..d2269f8 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -28,8 +28,8 @@ class SDNode; class SDNodeOrdering { DenseMap<const SDNode*, unsigned> OrderMap; - void operator=(const SDNodeOrdering&); // Do not implement. - SDNodeOrdering(const SDNodeOrdering&); // Do not implement. + void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; + SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; public: SDNodeOrdering() {} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index b7ce48a..d1f36cb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -12,19 +12,20 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pre-RA-sched" -#include "ScheduleDAGSDNodes.h" -#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" +#include "InstrEmitter.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; STATISTIC(NumUnfolds, "Number of nodes unfolded"); @@ -34,6 +35,10 @@ STATISTIC(NumPRCopies, "Number of physical copies"); static RegisterScheduler fastDAGScheduler("fast", "Fast suboptimal list scheduling", createFastDAGScheduler); +static RegisterScheduler + linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", + createDAGLinearizer); + namespace { /// FastPriorityQueue - A degenerate priority queue that considers @@ -331,7 +336,9 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } } if (isNewLoad) { - AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); + SDep D(LoadSU, SDep::Barrier); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); } ++NumUnfolds; @@ -407,9 +414,12 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { RemovePred(DelDeps[i].first, DelDeps[i].second); } - - AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); - AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); Copies.push_back(CopyFromSU); Copies.push_back(CopyToSU); @@ -586,18 +596,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() { InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -629,6 +635,155 @@ void ScheduleDAGFast::ListScheduleBottomUp() { #endif } + +namespace { +//===----------------------------------------------------------------------===// +// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the +// DAG in topological order. +// IMPORTANT: this may not work for targets with phyreg dependency. +// +class ScheduleDAGLinearize : public ScheduleDAGSDNodes { +public: + ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + +private: + std::vector<SDNode*> Sequence; + DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user + + void ScheduleNode(SDNode *N); +}; +} // end anonymous namespace + +void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { + if (N->getNodeId() != 0) + llvm_unreachable(0); + + if (!N->isMachineOpcode() && + (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) + // These nodes do not need to be translated into MIs. + return; + + DEBUG(dbgs() << "\n*** Scheduling: "); + DEBUG(N->dump(DAG)); + Sequence.push_back(N); + + unsigned NumOps = N->getNumOperands(); + if (unsigned NumLeft = NumOps) { + SDNode *GluedOpN = 0; + do { + const SDValue &Op = N->getOperand(NumLeft-1); + SDNode *OpN = Op.getNode(); + + if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { + // Schedule glue operand right above N. + GluedOpN = OpN; + assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); + OpN->setNodeId(0); + ScheduleNode(OpN); + continue; + } + + if (OpN == GluedOpN) + // Glue operand is already scheduled. + continue; + + DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); + if (DI != GluedMap.end() && DI->second != N) + // Users of glues are counted against the glued users. + OpN = DI->second; + + unsigned Degree = OpN->getNodeId(); + assert(Degree > 0 && "Predecessor over-released!"); + OpN->setNodeId(--Degree); + if (Degree == 0) + ScheduleNode(OpN); + } while (--NumLeft); + } +} + +/// findGluedUser - Find the representative use of a glue value by walking +/// the use chain. +static SDNode *findGluedUser(SDNode *N) { + while (SDNode *Glued = N->getGluedUser()) + N = Glued; + return N; +} + +void ScheduleDAGLinearize::Schedule() { + DEBUG(dbgs() << "********** DAG Linearization **********\n"); + + SmallVector<SDNode*, 8> Glues; + unsigned DAGSize = 0; + for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(), + E = DAG->allnodes_end(); I != E; ++I) { + SDNode *N = I; + + // Use node id to record degree. + unsigned Degree = N->use_size(); + N->setNodeId(Degree); + unsigned NumVals = N->getNumValues(); + if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && + N->hasAnyUseOfValue(NumVals-1)) { + SDNode *User = findGluedUser(N); + if (User) { + Glues.push_back(N); + GluedMap.insert(std::make_pair(N, User)); + } + } + + if (N->isMachineOpcode() || + (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) + ++DAGSize; + } + + for (unsigned i = 0, e = Glues.size(); i != e; ++i) { + SDNode *Glue = Glues[i]; + SDNode *GUser = GluedMap[Glue]; + unsigned Degree = Glue->getNodeId(); + unsigned UDegree = GUser->getNodeId(); + + // Glue user must be scheduled together with the glue operand. So other + // users of the glue operand must be treated as its users. + SDNode *ImmGUser = Glue->getGluedUser(); + for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); + ui != ue; ++ui) + if (*ui == ImmGUser) + --Degree; + GUser->setNodeId(UDegree + Degree); + Glue->setNodeId(1); + } + + Sequence.reserve(DAGSize); + ScheduleNode(DAG->getRoot().getNode()); +} + +MachineBasicBlock* +ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + + DEBUG({ + dbgs() << "\n*** Final schedule ***\n"; + }); + + // FIXME: Handle dbg_values. + unsigned NumNodes = Sequence.size(); + for (unsigned i = 0; i != NumNodes; ++i) { + SDNode *N = Sequence[NumNodes-i-1]; + DEBUG(N->dump(DAG)); + Emitter.EmitNode(N, false, false, VRBaseMap); + } + + DEBUG(dbgs() << '\n'); + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// @@ -637,3 +792,8 @@ llvm::ScheduleDAGSDNodes * llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { return new ScheduleDAGFast(*IS->MF); } + +llvm::ScheduleDAGSDNodes * +llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGLinearize(*IS->MF); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 2b86e36..31b9bf3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -16,22 +16,22 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pre-RA-sched" -#include "ScheduleDAGSDNodes.h" -#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include <climits> using namespace llvm; @@ -156,7 +156,7 @@ public: CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), - Topo(SUnits) { + Topo(SUnits, NULL) { const TargetMachine &tm = mf.getTarget(); if (DisableSchedCycles || !NeedLatency) @@ -268,7 +268,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF) { - EVT VT = RegDefPos.GetValue(); + MVT VT = RegDefPos.GetValue(); // Special handling for untyped values. These values can only come from // the expansion of custom DAG-to-DAG patterns. @@ -1058,7 +1058,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { // Add a data dependency to reflect that NewSU reads the value defined // by LoadSU. - AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); if (isNewLoad) AvailableQueue->addNode(LoadSU); @@ -1140,17 +1142,18 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, // Avoid scheduling the def-side copy before other successors. Otherwise // we could introduce another physreg interference on the copy and // continue inserting copies indefinitely. - SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true); - AddPred(SuccSU, D); + AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); - AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); - AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); AvailableQueue->updateNode(SU); AvailableQueue->addNode(CopyFromSU); @@ -1359,9 +1362,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } - AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); + AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current // node is no longer avaialable. Schedule a successor that's now @@ -1413,20 +1414,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; CurSU = NewDef; } @@ -1758,7 +1753,7 @@ public: return V; } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump(ScheduleDAG *DAG) const { // Emulate pop() without clobbering NodeQueueIds. std::vector<SUnit*> DumpQueue = Queue; @@ -1897,7 +1892,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { //===----------------------------------------------------------------------===// void RegReductionPQBase::dumpRegPressure() const { -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) { const TargetRegisterClass *RC = *I; @@ -1944,7 +1939,7 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (!N->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); @@ -1978,7 +1973,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { } for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { - EVT VT = RegDefPos.GetValue(); + MVT VT = RegDefPos.GetValue(); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); if (RegPressure[RCId] >= RegLimit[RCId]) ++PDiff; @@ -1991,7 +1986,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (!N->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); @@ -2102,7 +2097,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { const SDNode *PN = PredSU->getNode(); if (!PN->isMachineOpcode()) { if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); + MVT VT = PN->getSimpleValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); } @@ -2114,14 +2109,14 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (POpc == TargetOpcode::EXTRACT_SUBREG || POpc == TargetOpcode::INSERT_SUBREG || POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); + MVT VT = PN->getSimpleValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); continue; } unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); + MVT VT = PN->getSimpleValueType(i); if (!PN->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); @@ -2138,7 +2133,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (SU->NumSuccs && N->isMachineOpcode()) { unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) @@ -2936,10 +2931,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { !scheduleDAG->IsReachable(SuccSU, SU)) { DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); } } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 222dc55..b22440d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -13,26 +13,26 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pre-RA-sched" -#include "SDNodeDbgValue.h" #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); @@ -485,14 +485,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if(isChain && OpN->getOpcode() == ISD::TokenFactor) OpLatency = 0; - const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpLatency, PhysReg); + SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) + : SDep(OpSU, SDep::Data, PhysReg); + Dep.setLatency(OpLatency); if (!isChain && !UnitLatencies) { - computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); - ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); + computeOperandLatency(OpN, N, i, Dep); + ST.adjustSchedDependency(OpSU, SU, Dep); } - if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { + if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as @@ -561,7 +562,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { for (;DefIdx < NodeNumDefs; ++DefIdx) { if (!Node->hasAnyUseOfValue(DefIdx)) continue; - ValueType = Node->getValueType(DefIdx); + ValueType = Node->getSimpleValueType(DefIdx); ++DefIdx; return; // Found a normal regdef. } @@ -643,7 +644,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; @@ -663,7 +664,7 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { #endif } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScheduleDAGSDNodes::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) @@ -831,8 +832,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { } SmallVector<SDNode *, 4> GluedNodes; - for (SDNode *N = SU->getNode()->getGluedNode(); N; - N = N->getGluedNode()) + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) GluedNodes.push_back(N); while (!GluedNodes.empty()) { SDNode *N = GluedNodes.back(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 84e41fc..76067a1 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -114,7 +114,8 @@ namespace llvm { /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock /// according to the order specified in Sequence. /// - MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual MachineBasicBlock* + EmitSchedule(MachineBasicBlock::iterator &InsertPos); virtual void dumpNode(const SUnit *SU) const; @@ -134,13 +135,13 @@ namespace llvm { const SDNode *Node; unsigned DefIdx; unsigned NodeNumDefs; - EVT ValueType; + MVT ValueType; public: RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); bool IsValid() const { return Node != NULL; } - EVT GetValue() const { + MVT GetValue() const { assert(IsValid() && "bad iterator"); return ValueType; } @@ -158,6 +159,12 @@ namespace llvm { void InitNodeNumDefs(); }; + protected: + /// ForceUnitLatencies - Return true if all scheduling edges should be given + /// a latency value of one. The default is to return false; schedulers may + /// override this as needed. + virtual bool forceUnitLatencies() const { return false; } + private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index c851291..58aa1fe 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -19,19 +19,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include <climits> using namespace llvm; @@ -123,6 +123,8 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { llvm_unreachable(0); } #endif + assert(!D.isWeak() && "unexpected artificial DAG edge"); + --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d85d41b..6c29c67 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12,42 +12,43 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" -#include "SDNodeOrdering.h" #include "SDNodeDbgValue.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" +#include "SDNodeOrdering.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> #include <cmath> using namespace llvm; @@ -91,11 +92,6 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, const APFloat& Val) { assert(VT.isFloatingPoint() && "Can only convert between FP types"); - // PPC long double cannot be converted to any other type. - if (VT == MVT::ppcf128 || - &Val.getSemantics() == &APFloat::PPCDoubleDouble) - return false; - // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; @@ -136,13 +132,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { // constants are. SDValue NotZero = N->getOperand(i); unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); - if (isa<ConstantSDNode>(NotZero)) { - if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() < - EltSize) + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) { + if (CN->getAPIntValue().countTrailingOnes() < EltSize) return false; - } else if (isa<ConstantFPSDNode>(NotZero)) { - if (cast<ConstantFPSDNode>(NotZero)->getValueAPF() - .bitcastToAPInt().countTrailingOnes() < EltSize) + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize) return false; } else return false; @@ -179,11 +173,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Do not accept build_vectors that aren't all constants or which have non-0 // elements. SDValue Zero = N->getOperand(i); - if (isa<ConstantSDNode>(Zero)) { - if (!cast<ConstantSDNode>(Zero)->isNullValue()) + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { + if (!CN->isNullValue()) return false; - } else if (isa<ConstantFPSDNode>(Zero)) { - if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero()) + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { + if (!CFPN->getValueAPF().isPosZero()) return false; } else return false; @@ -494,8 +488,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } case ISD::TargetBlockAddress: case ISD::BlockAddress: { - ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress()); - ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags()); + const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); + ID.AddPointer(BA->getBlockAddress()); + ID.AddInteger(BA->getOffset()); + ID.AddInteger(BA->getTargetFlags()); break; } } // end switch (N->getOpcode()) @@ -883,21 +879,23 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TLI.getTargetData()->getABITypeAlignment(Ty); + return TLI.getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), + TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), + getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0), UpdateListeners(0) { AllNodes.push_back(&EntryNode); Ordering = new SDNodeOrdering(); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf) { +void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) { MF = &mf; + TTI = tti; Context = &mf.getFunction()->getContext(); } @@ -1079,7 +1077,8 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { return getConstantFP(APFloat((float)Val), VT, isTarget); else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) { + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 || + EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, @@ -1173,7 +1172,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1200,7 +1199,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1470,6 +1469,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, + int64_t Offset, bool isTarget, unsigned char TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; @@ -1477,12 +1477,14 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddPointer(BA); + ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags); + SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, + TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1541,7 +1543,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); unsigned StackAlign = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); return getFrameIndex(FrameIdx, TLI.getPointerTy()); @@ -1554,7 +1556,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const TargetData *TD = TLI.getTargetData(); + const DataLayout *TD = TLI.getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1609,10 +1611,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) { if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) { - // No compile time operations on this type yet. - if (N1C->getValueType(0) == MVT::ppcf128) - return SDValue(); - APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); switch (Cond) { default: break; @@ -2444,8 +2442,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - // No compile time operations on ppcf128. - if (VT == MVT::ppcf128) break; APFloat apf(APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, @@ -2454,9 +2450,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(Val.bitsToFloat(), VT); + return getConstantFP(APFloat(Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(Val.bitsToDouble(), VT); + return getConstantFP(APFloat(Val), VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), VT); @@ -2474,61 +2470,59 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, // Constant fold unary operations with a floating point constant operand. if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) { APFloat V = C->getValueAPF(); // make copy - if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) { - switch (Opcode) { - case ISD::FNEG: - V.changeSign(); + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - case ISD::FABS: - V.clearSign(); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - case ISD::FCEIL: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FTRUNC: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FFLOOR: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FP_EXTEND: { - bool ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &ignored); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - } - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: { - integerPart x[2]; - bool ignored; - assert(integerPartWidth >= 64); - // FIXME need to be more flexible about rounding mode. - APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), - Opcode==ISD::FP_TO_SINT, - APFloat::rmTowardZero, &ignored); - if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual - break; - APInt api(VT.getSizeInBits(), x); - return getConstant(api, VT); - } - case ISD::BITCAST: - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) - return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); - else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) - return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual break; - } + APInt api(VT.getSizeInBits(), x); + return getConstant(api, VT); + } + case ISD::BITCAST: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; } } @@ -3049,7 +3043,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // Cannonicalize constant to RHS if commutative std::swap(N1CFP, N2CFP); std::swap(N1, N2); - } else if (N2CFP && VT != MVT::ppcf128) { + } else if (N2CFP) { APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); APFloat::opStatus s; switch (Opcode) { @@ -3383,7 +3377,7 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, unsigned NumVTBytes = VT.getSizeInBits() / 8; unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); - uint64_t Val = 0; + APInt Val(NumBytes*8, 0); if (TLI.isLittleEndian()) { for (unsigned i = 0; i != NumBytes; ++i) Val |= (uint64_t)(unsigned char)Str[i] << i*8; @@ -3392,7 +3386,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } - return DAG.getConstant(Val, VT); + // If the "cost" of materializing the integer immediate is 1 or free, then + // it is cost effective to turn the load into the immediate. + const TargetTransformInfo *TTI = DAG.getTargetTransformInfo(); + if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2) + return DAG.getConstant(Val, VT); + return SDValue(0, 0); } /// getMemBasePlusOffset - Returns base and offset node for the @@ -3430,8 +3429,10 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, + bool ZeroMemset, bool MemcpyStrSrc, + bool AllowOverlap, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -3444,11 +3445,11 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsZeroVal, MemcpyStrSrc, + IsMemset, ZeroMemset, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() || + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || TLI.allowsUnalignedMemoryAccesses(VT)) { VT = TLI.getPointerTy(); } else { @@ -3474,21 +3475,51 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. + EVT NewVT = VT; + unsigned NewVTSize; + + bool Found = false; if (VT.isVector() || VT.isFloatingPoint()) { - VT = MVT::i64; - while (!TLI.isTypeLegal(VT)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - VTSize = VT.getSizeInBits() / 8; - } else { - // This can result in a type that is not legal on the target, e.g. - // 1 or 2 bytes on PPC. - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - VTSize >>= 1; + NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; + if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && + TLI.isSafeMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + TLI.isSafeMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; + } + } + + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + + // If the new VT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + // FIXME: Only does this for 64-bit or more since we don't have proper + // cost model for unaligned load / store. + bool Fast; + if (NumMemOps && AllowOverlap && + VTSize >= 8 && NewVTSize < Size && + TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) + VTSize = Size; + else { + VT = NewVT; + VTSize = NewVTSize; } } if (++NumMemOps > Limit) return false; + MemOps.push_back(VT); Size -= VTSize; } @@ -3516,7 +3547,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = + MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3531,12 +3564,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), - true, CopyFromStr, DAG, TLI)) + false, false, CopyFromStr, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3553,6 +3586,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + SrcOff -= VTSize - Size; + DstOff -= VTSize - Size; + } + if (CopyFromStr && (isZeroStr || (VT.isInteger() && !VT.isVector()))) { // It's unlikely a store of a vector immediate can be done in a single @@ -3561,11 +3602,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); - Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), - DstPtrInfo.getWithOffset(DstOff), isVol, - false, Align); - } else { + if (Value.getNode()) + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstPtrInfo.getWithOffset(DstOff), isVol, + false, Align); + } + + if (!Store.getNode()) { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right // thing to do is generate a LoadExt/StoreTrunc pair. These simplify @@ -3585,6 +3629,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, OutChains.push_back(Store); SrcOff += VTSize; DstOff += VTSize; + Size -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -3609,7 +3654,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3619,13 +3665,13 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), - SrcAlign, true, false, DAG, TLI)) + (DstAlignCanChange ? 0 : Align), SrcAlign, + false, false, false, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3687,7 +3733,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3695,12 +3742,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - IsZeroVal, false, DAG, TLI)) + true, IsZeroVal, false, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3722,6 +3769,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + DstOff -= VTSize - Size; + } // If this store is smaller than the largest store see whether we can get // the smaller value for free with a truncate. @@ -3740,6 +3794,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isVol, false, Align); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; + Size -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -3794,7 +3849,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3849,7 +3904,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3898,7 +3953,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -6094,7 +6149,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI.getTargetData()); + TLI.getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 483b051..8c22db3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -12,51 +12,51 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" -#include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/CallingConv.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IntegersSubsetMapping.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/IntegersSubsetMapping.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -89,7 +89,7 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT); + MVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -98,10 +98,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, /// (ISD::AssertSext). static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, - unsigned NumParts, EVT PartVT, EVT ValueVT, + unsigned NumParts, MVT PartVT, EVT ValueVT, + const Value *V, ISD::NodeType AssertOp = ISD::DELETED_NODE) { if (ValueVT.isVector()) - return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, + PartVT, ValueVT, V); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -125,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, if (RoundParts > 2) { Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, - PartVT, HalfVT); + PartVT, HalfVT, V); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, - RoundParts / 2, PartVT, HalfVT); + RoundParts / 2, PartVT, HalfVT, V); } else { Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); @@ -143,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, DL, - Parts + RoundParts, OddParts, PartVT, OddVT); + Parts + RoundParts, OddParts, PartVT, OddVT, V); // Combine the round and odd parts. Lo = Val; @@ -159,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, } } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) - assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && + assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && "Unexpected split"); SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); @@ -172,30 +174,30 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); } } // There is now one part, held in Val. Correct it to match ValueVT. - PartVT = Val.getValueType(); + EVT PartEVT = Val.getValueType(); - if (PartVT == ValueVT) + if (PartEVT == ValueVT) return Val; - if (PartVT.isInteger() && ValueVT.isInteger()) { - if (ValueVT.bitsLT(PartVT)) { + if (PartEVT.isInteger() && ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp != ISD::DELETED_NODE) - Val = DAG.getNode(AssertOp, DL, PartVT, Val, + Val = DAG.getNode(AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } - if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, @@ -204,20 +206,20 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); } -/// getCopyFromParts - Create a value that contains the specified legal parts -/// combined into the value they represent. If the parts combine to a type -/// larger then ValueVT then AssertOp can be used to specify whether the extra -/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT -/// (ISD::AssertSext). +/// getCopyFromPartsVector - Create a value that contains the specified legal +/// parts combined into the value they represent. If the parts combine to a +/// type larger then ValueVT then AssertOp can be used to specify whether the +/// extra bits are known to be zero (ISD::AssertZext) or sign extended from +/// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT) { + MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -225,7 +227,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // Handle a multi-element vector. if (NumParts > 1) { - EVT IntermediateVT, RegisterVT; + EVT IntermediateVT; + MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -233,7 +236,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - assert(RegisterVT == Parts[0].getValueType() && + assert(RegisterVT == Parts[0].getSimpleValueType() && "Part type doesn't match part!"); // Assemble the parts into intermediate operands. @@ -243,7 +246,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. @@ -252,7 +255,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the @@ -263,31 +266,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } // There is now one part, held in Val. Correct it to match ValueVT. - PartVT = Val.getValueType(); + EVT PartEVT = Val.getValueType(); - if (PartVT == ValueVT) + if (PartEVT == ValueVT) return Val; - if (PartVT.isVector()) { + if (PartEVT.isVector()) { // If the element type of the source/dest vectors are the same, but the // parts vector has more elements than the value vector, then we have a // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. - if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { - assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getIntPtrConstant(0)); } // Vector/Vector bitcast. - if (ValueVT.getSizeInBits() == PartVT.getSizeInBits()) + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - bool Smaller = ValueVT.bitsLE(PartVT); + bool Smaller = ValueVT.bitsLE(PartEVT); return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), DL, ValueVT, Val); @@ -295,17 +298,28 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // Trivial bitcast if the types are the same size and the destination // vector type is legal. - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() && + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle cases such as i8 -> <1 x i1> - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial scalar-to-vector conversions should get here!"); + if (ValueVT.getVectorNumElements() != 1) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("non-trivial scalar-to-vector conversion"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + report_fatal_error("Cannot handle scalar-to-vector conversion!"); + } if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartVT) { - bool Smaller = ValueVT.bitsLE(PartVT); + ValueVT.getVectorElementType() != PartEVT) { + bool Smaller = ValueVT.bitsLE(PartEVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), DL, ValueVT.getScalarType(), Val); } @@ -313,25 +327,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } - - - static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT); + MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT, + MVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); @@ -342,7 +353,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, return; assert(!ValueVT.isVector() && "Vector case handled elsewhere"); - if (PartVT == ValueVT) { + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { assert(NumParts == 1 && "No-op copy with multiple parts!"); Parts[0] = Val; return; @@ -364,7 +376,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. - assert(NumParts == 1 && PartVT != ValueVT); + assert(NumParts == 1 && PartEVT != ValueVT); Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. @@ -383,7 +395,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - assert(PartVT == ValueVT && "Type conversion failed!"); + if (PartEVT != ValueVT) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("scalar-to-vector conversion failed"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + } + Parts[0] = Val; return; } @@ -398,7 +422,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getIntPtrConstant(RoundBits)); - getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. @@ -444,20 +468,21 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT) { + MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (NumParts == 1) { - if (PartVT == ValueVT) { + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { // Nothing to do. } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && - PartVT.getVectorElementType() == ValueVT.getVectorElementType() && - PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && + PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in // undef elements. @@ -477,12 +502,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else if (PartVT.isVector() && - PartVT.getVectorElementType().bitsGE( + PartEVT.getVectorElementType().bitsGE( ValueVT.getVectorElementType()) && - PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - bool Smaller = PartVT.bitsLE(ValueVT); + bool Smaller = PartEVT.bitsLE(ValueVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), DL, PartVT, Val); } else{ @@ -502,7 +527,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, } // Handle a multi-element vector. - EVT IntermediateVT, RegisterVT; + EVT IntermediateVT; + MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -530,7 +556,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -538,13 +564,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); } } - - - namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information @@ -570,7 +593,7 @@ namespace { /// getRegisterType member function, however when with physical registers /// it is necessary to have a separate record of the types. /// - SmallVector<EVT, 4> RegVTs; + SmallVector<MVT, 4> RegVTs; /// Regs - This list holds the registers assigned to the values. /// Each legal or promoted value requires one register, and each @@ -581,7 +604,7 @@ namespace { RegsForValue() {} RegsForValue(const SmallVector<unsigned, 4> ®s, - EVT regvt, EVT valuevt) + MVT regvt, EVT valuevt) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue(LLVMContext &Context, const TargetLowering &tli, @@ -591,7 +614,7 @@ namespace { for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - EVT RegisterVT = tli.getRegisterType(Context, ValueVT); + MVT RegisterVT = tli.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); @@ -602,7 +625,7 @@ namespace { /// areValueTypesLegal - Return true if types of all the values are legal. bool areValueTypesLegal(const TargetLowering &TLI) { for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT RegisterVT = RegVTs[Value]; + MVT RegisterVT = RegVTs[Value]; if (!TLI.isTypeLegal(RegisterVT)) return false; } @@ -622,14 +645,15 @@ namespace { /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, + const Value *V = 0) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, const Value *V) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -648,7 +672,8 @@ namespace { SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); @@ -662,7 +687,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; + MVT RegisterVT = RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -722,7 +747,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); + NumRegs, RegisterVT, ValueVT, V); Part += NumRegs; Parts.clear(); } @@ -737,7 +762,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Get the list of the values's legal parts. @@ -746,10 +772,12 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; + MVT RegisterVT = RegVTs[Value]; + ISD::NodeType ExtendKind = + TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); + &Parts[Part], NumParts, RegisterVT, V, ExtendKind); Part += NumParts; } @@ -812,7 +840,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); - EVT RegisterVT = RegVTs[Value]; + MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); @@ -825,7 +853,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - TD = DAG.getTarget().getTargetData(); + TD = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -945,7 +973,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" } // Assign the ordering to the freshly created DAG nodes. @@ -994,7 +1022,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1149,7 +1177,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); } llvm_unreachable("Can't get register for value!"); @@ -1205,24 +1233,27 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ISD::NodeType ExtendKind = ISD::ANY_EXTEND; const Function *F = I.getParent()->getParent(); - if (F->paramHasAttr(0, Attribute::SExt)) + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->paramHasAttr(0, Attribute::ZExt)) + else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); + VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->paramHasAttr(0, Attribute::InReg)) + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::InReg)) Flags.setInReg(); // Propagate extension type if any @@ -1233,7 +1264,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true)); + /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -1736,8 +1767,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); } - B.RegVT = VT; - B.Reg = FuncInfo.CreateReg(VT); + B.RegVT = VT.getSimpleVT(); + B.Reg = FuncInfo.CreateReg(B.RegVT); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), B.Reg, Sub); @@ -1771,7 +1802,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { - EVT VT = BB.RegVT; + MVT VT = BB.RegVT; SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, VT); SDValue Cmp; @@ -2093,7 +2124,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(4)) + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2565,9 +2596,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) continue; - // If the switch has more than 5 blocks, and at least 40% dense, and the + // If the switch has more than N blocks, and is at least 40% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. + // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; @@ -2581,14 +2613,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. - SmallVector<BasicBlock*, 32> succs; - succs.reserve(I.getNumSuccessors()); - for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) - succs.push_back(I.getSuccessor(i)); - array_pod_sort(succs.begin(), succs.end()); - succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + SmallSet<BasicBlock*, 32> Done; + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { + BasicBlock *BB = I.getSuccessor(i); + bool Inserted = Done.insert(BB); + if (!Inserted) + continue; + + MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; addSuccessorWithWeight(IndirectBrMBB, Succ); } @@ -3114,12 +3146,12 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { OI != E; ++OI) { const Value *Idx = *OI; if (StructType *StTy = dyn_cast<StructType>(Ty)) { - unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, - DAG.getIntPtrConstant(Offset)); + DAG.getConstant(Offset, N.getValueType())); } Ty = StTy->getElementType(Field); @@ -3164,7 +3196,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { N.getValueType(), IdxN, DAG.getConstant(Amt, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); + SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), N.getValueType(), IdxN, Scale); } @@ -3185,9 +3217,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); @@ -3664,16 +3696,12 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } -/// visitExp - Lower an exp intrinsic. Handles the special sequences for +/// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitExp(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3692,6 +3720,7 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); + SDValue TwoToFracPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3705,16 +3734,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5); - - // Add the exponent into the result in integer domain. - SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, - TwoToFracPartOfX, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = @@ -3731,16 +3753,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7); - - // Add the exponent into the result in integer domain. - SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, - TwoToFracPartOfX, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = @@ -3769,37 +3784,27 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl, - MVT::i32, t13); - - // Add the exponent into the result in integer domain. - SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, - TwoToFracPartOfX, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14); + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FEXP, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); } -/// visitLog - Lower a log intrinsic. Handles the special sequences for +/// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitLog(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. @@ -3811,6 +3816,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); + SDValue LogOfMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3824,12 +3830,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3fb3a2b1)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); - SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f949a29)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, LogOfMantissa); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // LogOfMantissa = @@ -3850,12 +3853,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40348e95)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fdef31a)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, LogOfMantissa); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // LogOfMantissa = @@ -3884,32 +3884,23 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x408797cb)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4006dcab)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, LogOfMantissa); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FLOG, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); } -/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitLog2(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Get the exponent. @@ -3921,6 +3912,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { // Different possible minimax approximations of significand in // floating-point for various degrees of accuracy over [1,2]. + SDValue Log2ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3932,12 +3924,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x40019463)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); - SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fd6633d)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log2ofMantissa); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log2ofMantissa = @@ -3958,12 +3947,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40823e2f)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x4020d29c)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log2ofMantissa); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log2ofMantissa = @@ -3993,32 +3979,23 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x40c39dad)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4042902c)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log2ofMantissa); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FLOG2, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); } -/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitLog10(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. @@ -4030,6 +4007,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); + SDValue Log10ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -4043,12 +4021,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f1c0789)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); - SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f011300)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log10ofMantissa); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log10ofMantissa = @@ -4065,12 +4040,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f6ae232)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f25f7c3)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log10ofMantissa); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log10ofMantissa = @@ -4095,33 +4067,23 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fc4316c)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3f57ce70)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log10ofMantissa); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FLOG10, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); } -/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitExp2(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); // FractionalPartOfX = x - (float)IntegerPartOfX; @@ -4132,6 +4094,7 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); + SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -4145,15 +4108,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = @@ -4170,15 +4127,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = @@ -4206,54 +4157,42 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FEXP2, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, + TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. -void -SelectionDAGBuilder::visitPow(const CallInst &I) { - SDValue result; - const Value *Val = I.getArgOperand(0); - DebugLoc dl = getCurDebugLoc(); +static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, + SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; - - if (getValue(Val).getValueType() == MVT::f32 && - getValue(I.getArgOperand(1)).getValueType() == MVT::f32 && + if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) { - if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { - APFloat Ten(10.0f); - IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten); - } + if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { + APFloat Ten(10.0f); + IsExp10 = LHSC->isExactlyValue(Ten); } } - if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(1)); - + if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OF10 3.3219281f // IntegerPartOfX = (int32_t)(x * LOG2OF10); - SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, getF32Constant(DAG, 0x40549a78)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); @@ -4265,6 +4204,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); + SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -4278,15 +4218,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = @@ -4303,15 +4237,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = @@ -4339,24 +4267,18 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FPOW, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1))); + + SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } @@ -4377,7 +4299,8 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttr(Attribute::OptimizeForSize) || + if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. CountPopulation_32(Val)+Log2_32(Val) < 7) { @@ -4850,7 +4773,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits // to be zero. // We must do this early because v2i32 is not a legal type. - DebugLoc dl = getCurDebugLoc(); SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); @@ -4867,7 +4789,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - DebugLoc dl = getCurDebugLoc(); EVT DestVT = TLI.getValueType(I.getType()); EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * @@ -4883,7 +4804,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - DebugLoc dl = getCurDebugLoc(); EVT DestVT = TLI.getValueType(I.getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); @@ -4917,7 +4837,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } EVT DestVT = TLI.getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); - Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), + Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), getValue(I.getArgOperand(1)), @@ -4926,53 +4846,57 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } - case Intrinsic::sqrt: - setValue(&I, DAG.getNode(ISD::FSQRT, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); - return 0; case Intrinsic::powi: setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return 0; - case Intrinsic::sin: - setValue(&I, DAG.getNode(ISD::FSIN, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); - return 0; - case Intrinsic::cos: - setValue(&I, DAG.getNode(ISD::FCOS, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); - return 0; case Intrinsic::log: - visitLog(I); + setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::log2: - visitLog2(I); + setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::log10: - visitLog10(I); + setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::exp: - visitExp(I); + setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::exp2: - visitExp2(I); + setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::pow: - visitPow(I); + setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG, TLI)); return 0; + case Intrinsic::sqrt: case Intrinsic::fabs: - setValue(&I, DAG.getNode(ISD::FABS, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); - return 0; + case Intrinsic::sin: + case Intrinsic::cos: case Intrinsic::floor: - setValue(&I, DAG.getNode(ISD::FFLOOR, dl, + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::fabs: Opcode = ISD::FABS; break; + case Intrinsic::sin: Opcode = ISD::FSIN; break; + case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + } + + setValue(&I, DAG.getNode(Opcode, dl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; + } case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -4983,7 +4907,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isOperationLegal(ISD::FMA, VT) && + TLI.isOperationLegalOrCustom(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){ setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -5080,7 +5004,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, + Res = DAG.getStore(getRoot(), dl, Src, FIN, MachinePointerInfo::getFixedStack(FI), true, false, 0); setValue(&I, Res); @@ -5152,10 +5076,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } + case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::TRAP : ISD::DEBUGTRAP; + DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot())); return 0; } TargetLowering::ArgListTy Args; @@ -5165,15 +5092,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), - Args, DAG, getCurDebugLoc()); + Args, DAG, dl); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return 0; } - case Intrinsic::debugtrap: { - DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot())); - return 0; - } + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: @@ -5194,7 +5118,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2)); return 0; } case Intrinsic::prefetch: { @@ -5273,8 +5197,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), DAG.getMachineFunction(), @@ -5285,9 +5208,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, int DemoteStackIdx = -100; if (!CanLowerReturn) { - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( FTy->getReturnType()); - unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); @@ -5357,11 +5280,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) isTailCall = false; - // If there's a possibility that fast-isel has already selected some amount - // of the current basic block, don't emit a tail call. - if (isTailCall && TM.Options.EnableFastISel) - isTailCall = false; - TargetLowering:: CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, getCurDebugLoc(), CS); @@ -5752,7 +5670,7 @@ public: /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const TargetData *TD) const { + const DataLayout *TD) const { if (CallOperandVal == 0) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) @@ -5833,7 +5751,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - EVT RegVT = *PhysReg.second->vt_begin(); + MVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); @@ -5843,8 +5761,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // bitcast to the corresponding integer type. This turns an f64 value // into i64, which can be passed with two i32 values on a 32-bit // machine. - RegVT = EVT::getIntegerVT(Context, - OpInfo.ConstraintVT.getSizeInBits()); + RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; @@ -5854,7 +5771,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } - EVT RegVT; + MVT RegVT; EVT ValueVT = OpInfo.ConstraintVT; // If this is a constraint for a specific physical register, like {r17}, @@ -5928,7 +5845,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); - EVT OpVT = MVT::Other; + MVT OpVT = MVT::Other; // Compute the value type for each operand. switch (OpInfo.Type) { @@ -5943,10 +5860,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpVT = TLI.getValueType(STy->getElementType(ResNo)); + OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getValueType(CS.getType()); + OpVT = TLI.getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -5967,7 +5884,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD); + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD). + getSimpleVT(); } OpInfo.ConstraintVT = OpVT; @@ -6056,8 +5974,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); - unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); @@ -6105,7 +6023,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3. + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // bits as operand 3. unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; @@ -6113,6 +6032,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo |= InlineAsm::Extra_IsAlignStack; // Set the asm dialect. ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // Ideally, we would only check against memory constraints. However, the + // meaning of an other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for other constriants as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + ExtraInfo |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + ExtraInfo |= InlineAsm::Extra_MayStore; + } + } + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, TLI.getPointerTy())); @@ -6212,7 +6152,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue MatchedRegs; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); - EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); @@ -6222,7 +6162,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); @@ -6304,7 +6244,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, DAG, AsmNodeOperands); @@ -6335,7 +6275,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { @@ -6375,7 +6315,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -6405,7 +6345,7 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const TargetData &TD = *TLI.getTargetData(); + const DataLayout &TD = *TLI.getDataLayout(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), @@ -6451,7 +6391,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = - getTargetData()->getABITypeAlignment(ArgTy); + getDataLayout()->getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -6465,7 +6405,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setByVal(); PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy)); + Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. unsigned FrameAlign; @@ -6479,7 +6419,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -6490,12 +6430,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, ExtendKind); + PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), - i < CLI.NumFixedArgs); + i < CLI.NumFixedArgs, + i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) @@ -6513,11 +6454,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ComputeValueVTs(*this, CLI.RetTy, RetTys); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT.getSimpleVT(); + MyFlags.VT = RegisterVT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); @@ -6567,11 +6508,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, + NumRegs, RegisterVT, VT, NULL, AssertOp)); CurReg += NumRegs; } @@ -6610,7 +6551,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); PendingExports.push_back(Chain); } @@ -6640,13 +6581,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; DebugLoc dl = SDB->getCurDebugLoc(); - const TargetData *TD = TLI.getTargetData(); + const DataLayout *TD = TLI.getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. @@ -6657,8 +6597,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); - EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, true); + MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); Ins.push_back(RetArg); } @@ -6677,15 +6617,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { unsigned OriginalAlignment = TD->getABITypeAlignment(ArgTy); - if (F.paramHasAttr(Idx, Attribute::ZExt)) + if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); - if (F.paramHasAttr(Idx, Attribute::SExt)) + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) Flags.setSExt(); - if (F.paramHasAttr(Idx, Attribute::InReg)) + if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) Flags.setInReg(); - if (F.paramHasAttr(Idx, Attribute::StructRet)) + if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.paramHasAttr(Idx, Attribute::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { Flags.setByVal(); PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); @@ -6699,14 +6639,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { FrameAlign = TLI.getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); } - if (F.paramHasAttr(Idx, Attribute::Nest)) + if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed); + ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, + Idx-1, i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -6748,11 +6689,11 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); - EVT VT = ValueVTs[0]; - EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT VT = ValueVTs[0].getSimpleVT(); + MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, AssertOp); + RegVT, VT, NULL, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -6781,19 +6722,19 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (F.paramHasAttr(Idx, Attribute::SExt)) + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; - else if (F.paramHasAttr(Idx, Attribute::ZExt)) + else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, - AssertOp)); + NULL, AssertOp)); } i += NumParts; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 3b7615a..9188945 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -14,12 +14,12 @@ #ifndef SELECTIONDAGBUILDER_H #define SELECTIONDAGBUILDER_H -#include "llvm/Constants.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> @@ -66,7 +66,7 @@ class ShuffleVectorInst; class SIToFPInst; class StoreInst; class SwitchInst; -class TargetData; +class DataLayout; class TargetLibraryInfo; class TargetLowering; class TruncInst; @@ -262,7 +262,7 @@ private: struct BitTestBlock { BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, EVT RgVT, bool E, + unsigned Rg, MVT RgVT, bool E, MachineBasicBlock* P, MachineBasicBlock* D, const BitTestInfo& C): First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), @@ -271,7 +271,7 @@ private: APInt Range; const Value *SValue; unsigned Reg; - EVT RegVT; + MVT RegVT; bool Emitted; MachineBasicBlock *Parent; MachineBasicBlock *Default; @@ -285,7 +285,7 @@ public: const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - const TargetData *TD; + const DataLayout *TD; AliasAnalysis *AA; const TargetLibraryInfo *LibInfo; @@ -533,13 +533,6 @@ private: const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); - void visitPow(const CallInst &I); - void visitExp2(const CallInst &I); - void visitExp(const CallInst &I); - void visitLog(const CallInst &I); - void visitLog2(const CallInst &I); - void visitLog10(const CallInst &I); - void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); void visitVAEnd(const CallInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 75989ad..5701b13 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -11,23 +11,23 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/GraphWriter.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; std::string SDNode::getOperationName(const SelectionDAG *G) const { @@ -475,11 +475,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << "<" << *M->getMemOperand() << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(this)) { + int64_t offset = BA->getOffset(); OS << "<"; WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); OS << ", "; WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); OS << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7542941..d4e9a50 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -12,23 +12,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -37,22 +32,29 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/Statistic.h" #include <algorithm> using namespace llvm; @@ -216,8 +218,9 @@ namespace llvm { ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetLowering &TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>(); - if (OptLevel == CodeGenOpt::None || + if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || TLI.getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) @@ -348,13 +351,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); LibInfo = &getAnalysis<TargetLibraryInfo>(); + TTI = getAnalysisIfAvailable<TargetTransformInfo>(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF); + CurDAG->init(*MF, TTI); FuncInfo->set(Fn, *MF); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -474,6 +478,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MRI.replaceRegWith(From, To); } + // Freeze the set of reserved registers now that MachineFrameInfo has been + // set up. All the information required by getReservedRegs() should be + // available now. + MRI.freezeReservedRegs(*MF); + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -999,12 +1008,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (AllPredsVisited) { for (BasicBlock::const_iterator I = LLVMBB->begin(); - isa<PHINode>(I); ++I) - FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I)); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) + FuncInfo->ComputePHILiveOutRegInfo(PN); } else { for (BasicBlock::const_iterator I = LLVMBB->begin(); - isa<PHINode>(I); ++I) - FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I)); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) + FuncInfo->InvalidatePHILiveOutRegInfo(PN); } FuncInfo->VisitedBBs.insert(LLVMBB); @@ -1106,19 +1115,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } bool HadTailCall = false; + MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; SelectBasicBlock(Inst, BI, HadTailCall); - // Recompute NumFastIselRemaining as Selection DAG instruction - // selection may have handled the call, input args, etc. - unsigned RemainingNow = std::distance(Begin, BI); - NumFastIselFailures += NumFastIselRemaining - RemainingNow; - // If the call was emitted as a tail call, we're done with the block. + // We also need to delete any previously emitted instructions. if (HadTailCall) { + FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end()); --BI; break; } + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; NumFastIselRemaining = RemainingNow; continue; } @@ -1184,14 +1195,12 @@ SelectionDAGISel::FinishBasicBlock() { SDB->JTCases.empty() && SDB->BitTestCases.empty()) { for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) continue; - PHI->addOperand( - MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } return; } @@ -1243,33 +1252,23 @@ SelectionDAGISel::FinishBasicBlock() { // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // This is "default" BB. We have two jumps to it. From "header" BB and // from last "case" BB. - if (PHIBB == SDB->BitTestCases[i].Default) { - PHI->addOperand(MachineOperand:: - CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent)); - PHI->addOperand(MachineOperand:: - CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases. - back().ThisBB)); - } + if (PHIBB == SDB->BitTestCases[i].Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->BitTestCases[i].Parent) + .addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB); // One of "cases" BB. for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; - if (cBB->isSuccessor(PHIBB)) { - PHI->addOperand(MachineOperand:: - CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(cBB)); - } + if (cBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); } } } @@ -1304,25 +1303,17 @@ SelectionDAGISel::FinishBasicBlock() { // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. - if (PHIBB == SDB->JTCases[i].second.Default) { - PHI->addOperand - (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand - (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); - } + if (PHIBB == SDB->JTCases[i].second.Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->JTCases[i].first.HeaderBB); // JT BB. Just iterate over successors here - if (FuncInfo->MBB->isSuccessor(PHIBB)) { - PHI->addOperand - (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); - } + if (FuncInfo->MBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB); } } SDB->JTCases.clear(); @@ -1330,14 +1321,11 @@ SelectionDAGISel::FinishBasicBlock() { // If the switch block involved a branch to one of the actual successors, we // need to update PHI nodes in that block. for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (FuncInfo->MBB->isSuccessor(PHI->getParent())) { - PHI->addOperand( - MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); - } + if (FuncInfo->MBB->isSuccessor(PHI->getParent())) + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } // If we generated any switch lowering information, build and codegen any @@ -1373,18 +1361,16 @@ SelectionDAGISel::FinishBasicBlock() { // FuncInfo->MBB may have been removed from the CFG if a branch was // constant folded. if (ThisBB->isSuccessor(FuncInfo->MBB)) { - for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin(); - Phi != FuncInfo->MBB->end() && Phi->isPHI(); - ++Phi) { + for (MachineBasicBlock::iterator + MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end(); + MBBI != MBBE && MBBI->isPHI(); ++MBBI) { + MachineInstrBuilder PHI(*MF, MBBI); // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { assert(pn != FuncInfo->PHINodesToUpdate.size() && "Didn't find PHI entry!"); - if (FuncInfo->PHINodesToUpdate[pn].first == Phi) { - Phi->addOperand(MachineOperand:: - CreateReg(FuncInfo->PHINodesToUpdate[pn].second, - false)); - Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); + if (FuncInfo->PHINodesToUpdate[pn].first == PHI) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB); break; } } @@ -2004,7 +1990,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, return Res; } -/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +/// CheckSame - Implements OP_CheckSame. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 3921635..b752b48 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -11,21 +11,21 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index dcaa9ba..34f3bc9 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12,24 +12,24 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetLowering.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/GlobalVariable.h" -#include "llvm/DerivedTypes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include <cctype> using namespace llvm; @@ -94,98 +94,124 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::ADD_F32] = "__addsf3"; Names[RTLIB::ADD_F64] = "__adddf3"; Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_F128] = "__addtf3"; Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; Names[RTLIB::SUB_F32] = "__subsf3"; Names[RTLIB::SUB_F64] = "__subdf3"; Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_F128] = "__subtf3"; Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; Names[RTLIB::MUL_F32] = "__mulsf3"; Names[RTLIB::MUL_F64] = "__muldf3"; Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_F128] = "__multf3"; Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; Names[RTLIB::DIV_F32] = "__divsf3"; Names[RTLIB::DIV_F64] = "__divdf3"; Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_F128] = "__divtf3"; Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; Names[RTLIB::REM_F32] = "fmodf"; Names[RTLIB::REM_F64] = "fmod"; Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_F128] = "fmodl"; Names[RTLIB::REM_PPCF128] = "fmodl"; Names[RTLIB::FMA_F32] = "fmaf"; Names[RTLIB::FMA_F64] = "fma"; Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_F128] = "fmal"; Names[RTLIB::FMA_PPCF128] = "fmal"; Names[RTLIB::POWI_F32] = "__powisf2"; Names[RTLIB::POWI_F64] = "__powidf2"; Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_F128] = "__powitf2"; Names[RTLIB::POWI_PPCF128] = "__powitf2"; Names[RTLIB::SQRT_F32] = "sqrtf"; Names[RTLIB::SQRT_F64] = "sqrt"; Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_F128] = "sqrtl"; Names[RTLIB::SQRT_PPCF128] = "sqrtl"; Names[RTLIB::LOG_F32] = "logf"; Names[RTLIB::LOG_F64] = "log"; Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_F128] = "logl"; Names[RTLIB::LOG_PPCF128] = "logl"; Names[RTLIB::LOG2_F32] = "log2f"; Names[RTLIB::LOG2_F64] = "log2"; Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_F128] = "log2l"; Names[RTLIB::LOG2_PPCF128] = "log2l"; Names[RTLIB::LOG10_F32] = "log10f"; Names[RTLIB::LOG10_F64] = "log10"; Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_F128] = "log10l"; Names[RTLIB::LOG10_PPCF128] = "log10l"; Names[RTLIB::EXP_F32] = "expf"; Names[RTLIB::EXP_F64] = "exp"; Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_F128] = "expl"; Names[RTLIB::EXP_PPCF128] = "expl"; Names[RTLIB::EXP2_F32] = "exp2f"; Names[RTLIB::EXP2_F64] = "exp2"; Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_F128] = "exp2l"; Names[RTLIB::EXP2_PPCF128] = "exp2l"; Names[RTLIB::SIN_F32] = "sinf"; Names[RTLIB::SIN_F64] = "sin"; Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_F128] = "sinl"; Names[RTLIB::SIN_PPCF128] = "sinl"; Names[RTLIB::COS_F32] = "cosf"; Names[RTLIB::COS_F64] = "cos"; Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_F128] = "cosl"; Names[RTLIB::COS_PPCF128] = "cosl"; Names[RTLIB::POW_F32] = "powf"; Names[RTLIB::POW_F64] = "pow"; Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_F128] = "powl"; Names[RTLIB::POW_PPCF128] = "powl"; Names[RTLIB::CEIL_F32] = "ceilf"; Names[RTLIB::CEIL_F64] = "ceil"; Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_F128] = "ceill"; Names[RTLIB::CEIL_PPCF128] = "ceill"; Names[RTLIB::TRUNC_F32] = "truncf"; Names[RTLIB::TRUNC_F64] = "trunc"; Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_F128] = "truncl"; Names[RTLIB::TRUNC_PPCF128] = "truncl"; Names[RTLIB::RINT_F32] = "rintf"; Names[RTLIB::RINT_F64] = "rint"; Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_F128] = "rintl"; Names[RTLIB::RINT_PPCF128] = "rintl"; Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; Names[RTLIB::NEARBYINT_F64] = "nearbyint"; Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; Names[RTLIB::FLOOR_F32] = "floorf"; Names[RTLIB::FLOOR_F64] = "floor"; Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_F128] = "floorl"; Names[RTLIB::FLOOR_PPCF128] = "floorl"; Names[RTLIB::COPYSIGN_F32] = "copysignf"; Names[RTLIB::COPYSIGN_F64] = "copysign"; Names[RTLIB::COPYSIGN_F80] = "copysignl"; + Names[RTLIB::COPYSIGN_F128] = "copysignl"; Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; + Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; + Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; @@ -200,6 +226,9 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; @@ -216,49 +245,66 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; Names[RTLIB::OEQ_F32] = "__eqsf2"; Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::OEQ_F128] = "__eqtf2"; Names[RTLIB::UNE_F32] = "__nesf2"; Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::UNE_F128] = "__netf2"; Names[RTLIB::OGE_F32] = "__gesf2"; Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OGE_F128] = "__getf2"; Names[RTLIB::OLT_F32] = "__ltsf2"; Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLT_F128] = "__lttf2"; Names[RTLIB::OLE_F32] = "__lesf2"; Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OLE_F128] = "__letf2"; Names[RTLIB::OGT_F32] = "__gtsf2"; Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::OGT_F128] = "__gttf2"; Names[RTLIB::UO_F32] = "__unordsf2"; Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::UO_F128] = "__unordtf2"; Names[RTLIB::O_F32] = "__unordsf2"; Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::O_F128] = "__unordtf2"; Names[RTLIB::MEMCPY] = "memcpy"; Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; @@ -311,6 +357,11 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { if (RetVT == MVT::f64) return FPEXT_F32_F64; + if (RetVT == MVT::f128) + return FPEXT_F32_F128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::f128) + return FPEXT_F64_F128; } return UNKNOWN_LIBCALL; @@ -324,11 +375,15 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { return FPROUND_F64_F32; if (OpVT == MVT::f80) return FPROUND_F80_F32; + if (OpVT == MVT::f128) + return FPROUND_F128_F32; if (OpVT == MVT::ppcf128) return FPROUND_PPCF128_F32; } else if (RetVT == MVT::f64) { if (OpVT == MVT::f80) return FPROUND_F80_F64; + if (OpVT == MVT::f128) + return FPROUND_F128_F64; if (OpVT == MVT::ppcf128) return FPROUND_PPCF128_F64; } @@ -368,6 +423,13 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { return FPTOSINT_F80_I64; if (RetVT == MVT::i128) return FPTOSINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOSINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F128_I128; } else if (OpVT == MVT::ppcf128) { if (RetVT == MVT::i32) return FPTOSINT_PPCF128_I32; @@ -411,6 +473,13 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { return FPTOUINT_F80_I64; if (RetVT == MVT::i128) return FPTOUINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOUINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F128_I128; } else if (OpVT == MVT::ppcf128) { if (RetVT == MVT::i32) return FPTOUINT_PPCF128_I32; @@ -428,29 +497,35 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { if (RetVT == MVT::f32) return SINTTOFP_I32_F32; - else if (RetVT == MVT::f64) + if (RetVT == MVT::f64) return SINTTOFP_I32_F64; - else if (RetVT == MVT::f80) + if (RetVT == MVT::f80) return SINTTOFP_I32_F80; - else if (RetVT == MVT::ppcf128) + if (RetVT == MVT::f128) + return SINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) return SINTTOFP_I32_PPCF128; } else if (OpVT == MVT::i64) { if (RetVT == MVT::f32) return SINTTOFP_I64_F32; - else if (RetVT == MVT::f64) + if (RetVT == MVT::f64) return SINTTOFP_I64_F64; - else if (RetVT == MVT::f80) + if (RetVT == MVT::f80) return SINTTOFP_I64_F80; - else if (RetVT == MVT::ppcf128) + if (RetVT == MVT::f128) + return SINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) return SINTTOFP_I64_PPCF128; } else if (OpVT == MVT::i128) { if (RetVT == MVT::f32) return SINTTOFP_I128_F32; - else if (RetVT == MVT::f64) + if (RetVT == MVT::f64) return SINTTOFP_I128_F64; - else if (RetVT == MVT::f80) + if (RetVT == MVT::f80) return SINTTOFP_I128_F80; - else if (RetVT == MVT::ppcf128) + if (RetVT == MVT::f128) + return SINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) return SINTTOFP_I128_PPCF128; } return UNKNOWN_LIBCALL; @@ -462,29 +537,35 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { if (RetVT == MVT::f32) return UINTTOFP_I32_F32; - else if (RetVT == MVT::f64) + if (RetVT == MVT::f64) return UINTTOFP_I32_F64; - else if (RetVT == MVT::f80) + if (RetVT == MVT::f80) return UINTTOFP_I32_F80; - else if (RetVT == MVT::ppcf128) + if (RetVT == MVT::f128) + return UINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) return UINTTOFP_I32_PPCF128; } else if (OpVT == MVT::i64) { if (RetVT == MVT::f32) return UINTTOFP_I64_F32; - else if (RetVT == MVT::f64) + if (RetVT == MVT::f64) return UINTTOFP_I64_F64; - else if (RetVT == MVT::f80) + if (RetVT == MVT::f80) return UINTTOFP_I64_F80; - else if (RetVT == MVT::ppcf128) + if (RetVT == MVT::f128) + return UINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) return UINTTOFP_I64_PPCF128; } else if (OpVT == MVT::i128) { if (RetVT == MVT::f32) return UINTTOFP_I128_F32; - else if (RetVT == MVT::f64) + if (RetVT == MVT::f64) return UINTTOFP_I128_F64; - else if (RetVT == MVT::f80) + if (RetVT == MVT::f80) return UINTTOFP_I128_F80; - else if (RetVT == MVT::ppcf128) + if (RetVT == MVT::f128) + return UINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) return UINTTOFP_I128_PPCF128; } return UNKNOWN_LIBCALL; @@ -496,26 +577,34 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); CCs[RTLIB::OEQ_F32] = ISD::SETEQ; CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::OEQ_F128] = ISD::SETEQ; CCs[RTLIB::UNE_F32] = ISD::SETNE; CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::UNE_F128] = ISD::SETNE; CCs[RTLIB::OGE_F32] = ISD::SETGE; CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OGE_F128] = ISD::SETGE; CCs[RTLIB::OLT_F32] = ISD::SETLT; CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLT_F128] = ISD::SETLT; CCs[RTLIB::OLE_F32] = ISD::SETLE; CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OLE_F128] = ISD::SETLE; CCs[RTLIB::OGT_F32] = ISD::SETGT; CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::OGT_F128] = ISD::SETGT; CCs[RTLIB::UO_F32] = ISD::SETNE; CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::UO_F128] = ISD::SETNE; CCs[RTLIB::O_F32] = ISD::SETEQ; CCs[RTLIB::O_F64] = ISD::SETEQ; + CCs[RTLIB::O_F128] = ISD::SETEQ; } /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { + : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -547,6 +636,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, setOperationAction(ISD::ConstantFP, MVT::f32, Expand); setOperationAction(ISD::ConstantFP, MVT::f64, Expand); setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + setOperationAction(ISD::ConstantFP, MVT::f128, Expand); // These library functions default to expand. setOperationAction(ISD::FLOG , MVT::f16, Expand); @@ -579,12 +669,27 @@ TargetLowering::TargetLowering(const TargetMachine &tm, setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f128, Expand); + setOperationAction(ISD::FLOG2, MVT::f128, Expand); + setOperationAction(ISD::FLOG10, MVT::f128, Expand); + setOperationAction(ISD::FEXP , MVT::f128, Expand); + setOperationAction(ISD::FEXP2, MVT::f128, Expand); + setOperationAction(ISD::FFLOOR, MVT::f128, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); + setOperationAction(ISD::FCEIL, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + // + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; @@ -613,6 +718,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ShouldFoldAtomicFences = false; InsertFencesForAtomic = false; SupportJumpTables = true; + MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -624,7 +730,7 @@ TargetLowering::~TargetLowering() { } MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize()); + return MVT::getIntegerVT(8*TD->getPointerSize(0)); } /// canOpTrap - Returns true if the operation can trap for the value type. @@ -647,7 +753,7 @@ bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, unsigned &NumIntermediates, - EVT &RegisterVT, + MVT &RegisterVT, TargetLowering *TLI) { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); @@ -682,7 +788,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, if (!isPowerOf2_32(NewVTSize)) NewVTSize = NextPowerOf2(NewVTSize); - EVT DestVT = TLI->getRegisterType(NewVT); + MVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); @@ -706,9 +812,9 @@ bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". std::pair<const TargetRegisterClass*, uint8_t> -TargetLowering::findRepresentativeClass(EVT VT) const { +TargetLowering::findRepresentativeClass(MVT VT) const { const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; + const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; if (!RC) return std::make_pair(RC, 0); @@ -752,14 +858,13 @@ void TargetLowering::computeRegisterProperties() { // Every integer value type larger than this largest register takes twice as // many registers to represent as the previous ValueType. - for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { - EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg; - if (!ExpandedVT.isInteger()) - break; + for (unsigned ExpandedReg = LargestIntReg + 1; + ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); - ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger); + ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, + TypeExpandInteger); } // Inspect all of the ValueType's smaller than the largest integer @@ -767,7 +872,7 @@ void TargetLowering::computeRegisterProperties() { unsigned LegalIntReg = LargestIntReg; for (unsigned IntReg = LargestIntReg - 1; IntReg >= (unsigned)MVT::i1; --IntReg) { - EVT IVT = (MVT::SimpleValueType)IntReg; + MVT IVT = (MVT::SimpleValueType)IntReg; if (isTypeLegal(IVT)) { LegalIntReg = IntReg; } else { @@ -818,14 +923,14 @@ void TargetLowering::computeRegisterProperties() { // Determine if there is a legal wider type. If so, we should promote to // that wider vector type. - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1) { + if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { bool IsLegalWiderType = false; // First try to promote the elements of integer vectors. If no legal // promotion was found, fallback to the widen-vector method. for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; + MVT SVT = (MVT::SimpleValueType)nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() @@ -844,7 +949,7 @@ void TargetLowering::computeRegisterProperties() { // Try to widen the vector. for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; + MVT SVT = (MVT::SimpleValueType)nVT; if (SVT.getVectorElementType() == EltVT && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { @@ -860,14 +965,14 @@ void TargetLowering::computeRegisterProperties() { } MVT IntermediateVT; - EVT RegisterVT; + MVT RegisterVT; unsigned NumIntermediates; NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; - EVT NVT = VT.getPow2VectorType(); + MVT NVT = VT.getPow2VectorType(); if (NVT == VT) { // Type is already a power of 2. The default action is to split. TransformToType[i] = MVT::Other; @@ -900,7 +1005,7 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { EVT TargetLowering::getSetCCResultType(EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); - return PointerTy.SimpleTy; + return getPointerTy(0).SimpleTy; } MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { @@ -919,7 +1024,7 @@ MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, - EVT &RegisterVT) const { + MVT &RegisterVT) const { unsigned NumElts = VT.getVectorNumElements(); // If there is a wider vector type with the same element type as this one, @@ -929,9 +1034,10 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, // <4 x i1> -> <4 x i32>. LegalizeTypeAction TA = getTypeAction(Context, VT); if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { - RegisterVT = getTypeToTransformTo(Context, VT); - if (isTypeLegal(RegisterVT)) { - IntermediateVT = RegisterVT; + EVT RegisterEVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterEVT)) { + IntermediateVT = RegisterEVT; + RegisterVT = RegisterEVT.getSimpleVT(); NumIntermediates = 1; return 1; } @@ -964,7 +1070,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, NewVT = EltTy; IntermediateVT = NewVT; - EVT DestVT = getRegisterType(Context, NewVT); + MVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; unsigned NewVTSize = NewVT.getSizeInBits(); @@ -972,7 +1078,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, if (!isPowerOf2_32(NewVTSize)) NewVTSize = NextPowerOf2(NewVTSize); - if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as @@ -984,7 +1090,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. /// TODO: Move this out of TargetLowering.cpp. -void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, +void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, SmallVectorImpl<ISD::OutputArg> &Outs, const TargetLowering &TLI) { SmallVector<EVT, 4> ValueVTs; @@ -996,9 +1102,9 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (attr & Attribute::SExt) + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; // FIXME: C calling convention requires the return type to be promoted to @@ -1006,28 +1112,27 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, // conventions. The frontend should mark functions whose return values // require promoting with signext or zeroext attributes. if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); if (VT.bitsLT(MinVT)) VT = MinVT; } unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg)) Flags.setInReg(); // Propagate extension type if any - if (attr & Attribute::SExt) + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) Flags.setSExt(); - else if (attr & Attribute::ZExt) + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) { - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); - } + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); } } @@ -1061,7 +1166,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) - return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0)); return Table; } @@ -1094,6 +1199,103 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } //===----------------------------------------------------------------------===// +// TargetTransformInfo Helpers +//===----------------------------------------------------------------------===// + +int TargetLowering::InstructionOpcodeToISD(unsigned Opcode) const { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/IR/Instruction.def" + }; + switch (static_cast<InstructionOpcodes>(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::UDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SIGN_EXTEND; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair<unsigned, MVT> +TargetLowering::getTypeLegalizationCost(Type *Ty) const { + LLVMContext &C = Ty->getContext(); + EVT MTy = getValueType(Ty); + + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + LegalizeKind LK = getTypeConversion(C, MTy); + + if (LK.first == TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) + Cost *= 2; + + // Keep legalizing the type. + MTy = LK.second; + } +} + +//===----------------------------------------------------------------------===// // Optimization Methods //===----------------------------------------------------------------------===// @@ -1157,7 +1359,8 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, // Search for the smallest integer type with free casts to and from // Op's type. For expedience, just check power-of-2 integer types. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros(); + unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros(); + unsigned SmallVTBits = DemandedSize; if (!isPowerOf2_32(SmallVTBits)) SmallVTBits = NextPowerOf2(SmallVTBits); for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { @@ -1170,7 +1373,9 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, Op.getNode()->getOperand(0)), DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getNode()->getOperand(1))); - SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X); + bool NeedZext = DemandedSize > SmallVTBits; + SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, + dl, Op.getValueType(), X); return CombineTo(Op, Z); } } @@ -2106,7 +2311,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = N0.getOperand(0).getValueType(); if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && - getCondCodeAction(Cond, newVT)==Legal)) + getCondCodeAction(Cond, newVT.getSimpleVT())==Legal)) return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(C1.trunc(InSize), newVT), Cond); @@ -2202,9 +2407,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), Cond); - } else if (Op0.getOpcode() == ISD::AND && - isa<ConstantSDNode>(Op0.getOperand(1)) && - cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { + } + if (Op0.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Op0.getOperand(1)) && + cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, @@ -2219,6 +2425,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(0, Op0.getValueType()), Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } + if (Op0.getOpcode() == ISD::AssertZext && + cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1) + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } } @@ -2271,7 +2482,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(MinVal, N0.getValueType()), ISD::SETEQ); // If we have setugt X, Max-1, turn it into seteq X, Max - else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) return DAG.getSetCC(dl, VT, N0, DAG.getConstant(MaxVal, N0.getValueType()), ISD::SETEQ); @@ -2401,36 +2612,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the condition is not legal, see if we can find an equivalent one // which is legal. - if (!isCondCodeLegal(Cond, N0.getValueType())) { + if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { // If the comparison was an awkward floating-point == or != and one of // the comparison operands is infinity or negative infinity, convert the // condition to a less-awkward <= or >=. if (CFP->getValueAPF().isInfinity()) { if (CFP->getValueAPF().isNegative()) { if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); } else { if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETULT, N0.getValueType())) + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETULT, N0.getValueType())) + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); } } @@ -2464,7 +2675,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || - getCondCodeAction(NewCond, N0.getValueType()) == Legal)) + getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2545,7 +2756,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (DAG.isCommutativeBinOp(N0.getOpcode())) return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(0, N0.getValueType()), Cond); - else if (N0.getNode()->hasOneUse()) { + if (N0.getNode()->hasOneUse()) { assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); // (Z-X) == X --> Z == X<<1 SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, @@ -2561,14 +2772,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::XOR) { // Simplify X == (X+Z) --> Z == 0 - if (N1.getOperand(0) == N0) { + if (N1.getOperand(0) == N0) return DAG.getSetCC(dl, VT, N1.getOperand(1), DAG.getConstant(0, N1.getValueType()), Cond); - } else if (N1.getOperand(1) == N0) { - if (DAG.isCommutativeBinOp(N1.getOpcode())) { + if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) return DAG.getSetCC(dl, VT, N1.getOperand(0), DAG.getConstant(0, N1.getValueType()), Cond); - } else if (N1.getNode()->hasOneUse()) { + if (N1.getNode()->hasOneUse()) { assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, @@ -2825,6 +3036,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, // Remove the braces from around the name. StringRef RegName(Constraint.data()+1, Constraint.size()-2); + std::pair<unsigned, const TargetRegisterClass*> R = + std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); + // Figure out which register class contains this reg. const TargetRegisterInfo *RI = TM.getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), @@ -2838,12 +3052,22 @@ getRegForInlineAsmConstraint(const std::string &Constraint, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (RegName.equals_lower(RI->getName(*I))) - return std::make_pair(*I, RC); + if (RegName.equals_lower(RI->getName(*I))) { + std::pair<unsigned, const TargetRegisterClass*> S = + std::make_pair(*I, RC); + + // If this register class has the requested value type, return it, + // otherwise keep searching and return the first class found + // if no other is found which explicitly has the requested type. + if (RC->hasType(VT)) + return S; + else if (!R.second) + R = S; + } } } - return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); + return R; } //===----------------------------------------------------------------------===// @@ -2908,10 +3132,10 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo)); + OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getValueType(CS.getType()); + OpInfo.ConstraintVT = getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -2950,13 +3174,14 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( case 64: case 128: OpInfo.ConstraintVT = - EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true); + MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); break; } - } else if (dyn_cast<PointerType>(OpTy)) { - OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize()); + } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { + OpInfo.ConstraintVT = MVT::getIntegerVT( + 8*TD->getPointerSize(PT->getAddressSpace())); } else { - OpInfo.ConstraintVT = EVT::getEVT(OpTy, true); + OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } } } @@ -3319,7 +3544,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue TargetLowering:: BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode*>* Created) const { + std::vector<SDNode*> *Created) const { EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); @@ -3379,7 +3604,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue TargetLowering:: BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode*>* Created) const { + std::vector<SDNode*> *Created) const { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index a081e3c..f769b44 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : TD(TM.getTargetData()) { + : TD(TM.getDataLayout()) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { |
