diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
20 files changed, 3178 insertions, 938 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cb88941..43f72c5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,6 +35,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -43,6 +45,7 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); +STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> @@ -53,6 +56,14 @@ namespace { CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Include global information in alias analysis")); + /// Hidden option to stress test load slicing, i.e., when this option + /// is enabled, load slicing bypasses most of its profitability guards. + static cl::opt<bool> + StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load " + "slicing"), + cl::init(false)); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -62,6 +73,7 @@ namespace { CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; + bool ForCodeSize; // Worklist of all of the nodes that need to be simplified. // @@ -144,6 +156,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); @@ -283,11 +296,11 @@ namespace { /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, + bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const; @@ -299,7 +312,7 @@ namespace { /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlignment, const MDNode *&TBAAInfo) const; @@ -315,8 +328,15 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + ForCodeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + } /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -329,7 +349,8 @@ namespace { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) : TLI.getPointerTy(); + return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) + : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -744,9 +765,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); } unsigned Opc = Op.getOpcode(); @@ -967,9 +986,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; @@ -1017,7 +1034,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // try and combine it. while (!WorkListContents.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // The WorkListOrder holds the SDNodes in order, but it may contain + // duplicates. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the // worklist *should* contain, and check the node we want to visit is should // actually be visited. @@ -1617,19 +1635,8 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, bool LegalOperations, bool LegalTypes) { if (!VT.isVector()) return DAG.getConstant(0, VT); - if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - // Produce a vector of zeros. - EVT ElemTy = VT.getVectorElementType(); - if (LegalTypes && TLI.getTypeAction(*DAG.getContext(), ElemTy) == - TargetLowering::TypePromoteInteger) - ElemTy = TLI.getTypeToTransformTo(*DAG.getContext(), ElemTy); - assert((!LegalTypes || TLI.isTypeLegal(ElemTy)) && - "Type for zero vector elements is not legal"); - SDValue El = DAG.getConstant(0, ElemTy); - std::vector<SDValue> Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - &Ops[0], Ops.size()); - } + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return DAG.getConstant(0, VT); return SDValue(); } @@ -1771,8 +1778,8 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are -/// all the same constant or undefined. +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); if (!C) @@ -1808,9 +1815,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0; - ConstValue0 = N0IsConst? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() : APInt(); + ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() + : APInt(); N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0; - ConstValue1 = N1IsConst? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() : APInt(); + ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() + : APInt(); } // fold (mul c1, c2) -> c1*c2 @@ -1823,20 +1832,24 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) return N1; + // We require a splat of the entire scalar bit width for non-contiguous + // bit patterns. + bool IsFullSplat = + ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1 == 1) + if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnesValue()) return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && ConstValue1.isPowerOf2()) + if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(ConstValue1.logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && (-ConstValue1).isPowerOf2()) { + if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. @@ -2675,6 +2688,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && + cast<ConstantSDNode>(RR)->isAllOnesValue()) || + (cast<ConstantSDNode>(LR)->isAllOnesValue() && + cast<ConstantSDNode>(RR)->isNullValue()))) { + SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), + LL, DAG.getConstant(1, LL.getValueType())); + AddToWorkList(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(N), VT, ADDNode, + DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { Op1 = ISD::getSetCCSwappedOperands(Op1); @@ -2718,9 +2744,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2739,11 +2763,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2773,10 +2794,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getChain(), LN0->getBasePtr(), ExtVT, + LN0->getMemOperand()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2812,7 +2831,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment); + Alignment, LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2848,6 +2867,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return BSwap; + } + return SDValue(); } @@ -2932,13 +2959,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (N00 != N10) return SDValue(); - // Make sure everything beyond the low halfword is zero since the SRL 16 - // will clear the top bits. + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16 && - (!LookPassAnd0 || !LookPassAnd1) && - !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) - return SDValue(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); + + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) @@ -3078,7 +3115,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, SDValue(Parts[0],0)); - // Result of the bswap should be rotated by 16. If it's not legal, than + // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) @@ -3343,29 +3380,9 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { if (LHSMask.getNode() || RHSMask.getNode()) return 0; - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) - if (RHSShiftAmt.getOpcode() == ISD::SUB && - LHSShiftAmt == RHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) - if (LHSShiftAmt.getOpcode() == ISD::SUB && - RHSShiftAmt == LHSShiftAmt.getOperand(1)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - - // Look for sign/zext/any-extended or truncate cases: + // If the shift amount is sign/zext/any-extended just peel it off. + SDValue LExtOp0 = LHSShiftAmt; + SDValue RExtOp0 = RHSShiftAmt; if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || @@ -3374,33 +3391,31 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { - SDValue LExtOp0 = LHSShiftAmt.getOperand(0); - SDValue RExtOp0 = RHSShiftAmt.getOperand(0); - if (RExtOp0.getOpcode() == ISD::SUB && - RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = + LExtOp0 = LHSShiftAmt.getOperand(0); + RExtOp0 = RHSShiftAmt.getOperand(0); + } + + if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, - LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } return 0; @@ -3620,6 +3635,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (shl c1, c2) -> c1<<c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); @@ -3697,6 +3718,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } + // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) + // Only fold this if the inner zext has no other uses to avoid increasing + // the total number of instructions. + if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorkList(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } + } + } + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding @@ -3750,6 +3792,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); @@ -3895,6 +3943,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); @@ -4217,6 +4271,23 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } +static +std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the inputs. + SDValue Lo, Hi, LL, LH, RL, RH; + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + + return std::make_pair(Lo, Hi); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4254,6 +4325,34 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + // If the VSELECT result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (N0.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; + llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + + Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); + Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); + + // Add the new VSELECT nodes to the work list in case they need to be split + // again. + AddToWorkList(Lo.getNode()); + AddToWorkList(Hi.getNode()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + return SDValue(); } @@ -4469,10 +4568,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); @@ -4493,10 +4590,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), @@ -4524,11 +4619,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, @@ -4593,9 +4685,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { return DAG.getSelect(SDLoc(N), VT, DAG.getSetCC(SDLoc(N), - getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), + getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), NegOne, DAG.getConstant(0, VT)); } } @@ -4762,10 +4854,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); @@ -4795,11 +4885,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, @@ -4826,10 +4913,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), @@ -4992,10 +5077,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); @@ -5016,9 +5099,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), @@ -5250,12 +5331,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign); + LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); else Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign); + NewAlign, LN0->getTBAAInfo()); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -5353,10 +5434,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorkList(ExtLoad.getNode()); @@ -5371,10 +5450,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -5657,7 +5734,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); @@ -5667,7 +5745,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign); + LN0->isInvariant(), OrigAlign, + LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BITCAST, SDLoc(N0), @@ -6652,16 +6731,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, SDLoc(N0), @@ -7451,13 +7528,16 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align, + LD->getTBAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7468,17 +7548,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), - BetterChain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + BetterChain, Ptr, LD->getMemOperand()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), LD->getValueType(0), - BetterChain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->getAlignment()); + BetterChain, Ptr, LD->getMemoryVT(), + LD->getMemOperand()); } // Create token factor to keep old chain connected. @@ -7498,9 +7573,562 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); + // Try to slice up N to more direct loads if the slices are mapped to + // different register banks or pairing can take place. + if (SliceUpLoad(N)) + return SDValue(N, 0); + return SDValue(); } +namespace { +/// \brief Helper structure used to slice a load in smaller loads. +/// Basically a slice is obtained from the following sequence: +/// Origin = load Ty1, Base +/// Shift = srl Ty1 Origin, CstTy Amount +/// Inst = trunc Shift to Ty2 +/// +/// Then, it will be rewriten into: +/// Slice = load SliceTy, Base + SliceOffset +/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 +/// +/// SliceTy is deduced from the number of bits that are actually used to +/// build Inst. +struct LoadedSlice { + /// \brief Helper structure used to compute the cost of a slice. + struct Cost { + /// Are we optimizing for code size. + bool ForCodeSize; + /// Various cost. + unsigned Loads; + unsigned Truncates; + unsigned CrossRegisterBanksCopies; + unsigned ZExts; + unsigned Shift; + + Cost(bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + + /// \brief Get the cost of one isolated slice. + Cost(const LoadedSlice &LS, bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + EVT TruncType = LS.Inst->getValueType(0); + EVT LoadedType = LS.getLoadedType(); + if (TruncType != LoadedType && + !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) + ZExts = 1; + } + + /// \brief Account for slicing gain in the current cost. + /// Slicing provide a few gains like removing a shift or a + /// truncate. This method allows to grow the cost of the original + /// load with the gain from this slice. + void addSliceGain(const LoadedSlice &LS) { + // Each slice saves a truncate. + const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); + if (!TLI.isTruncateFree(LS.Inst->getValueType(0), + LS.Inst->getOperand(0).getValueType())) + ++Truncates; + // If there is a shift amount, this slice gets rid of it. + if (LS.Shift) + ++Shift; + // If this slice can merge a cross register bank copy, account for it. + if (LS.canMergeExpensiveCrossRegisterBankCopy()) + ++CrossRegisterBanksCopies; + } + + Cost &operator+=(const Cost &RHS) { + Loads += RHS.Loads; + Truncates += RHS.Truncates; + CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; + ZExts += RHS.ZExts; + Shift += RHS.Shift; + return *this; + } + + bool operator==(const Cost &RHS) const { + return Loads == RHS.Loads && Truncates == RHS.Truncates && + CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && + ZExts == RHS.ZExts && Shift == RHS.Shift; + } + + bool operator!=(const Cost &RHS) const { return !(*this == RHS); } + + bool operator<(const Cost &RHS) const { + // Assume cross register banks copies are as expensive as loads. + // FIXME: Do we want some more target hooks? + unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; + unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; + // Unless we are optimizing for code size, consider the + // expensive operation first. + if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) + return ExpensiveOpsLHS < ExpensiveOpsRHS; + return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < + (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); + } + + bool operator>(const Cost &RHS) const { return RHS < *this; } + + bool operator<=(const Cost &RHS) const { return !(RHS < *this); } + + bool operator>=(const Cost &RHS) const { return !(*this < RHS); } + }; + // The last instruction that represent the slice. This should be a + // truncate instruction. + SDNode *Inst; + // The original load instruction. + LoadSDNode *Origin; + // The right shift amount in bits from the original load. + unsigned Shift; + // The DAG from which Origin came from. + // This is used to get some contextual information about legal types, etc. + SelectionDAG *DAG; + + LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, + unsigned Shift = 0, SelectionDAG *DAG = NULL) + : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} + + LoadedSlice(const LoadedSlice &LS) + : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} + + /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// \return Result is \p BitWidth and has used bits set to 1 and + /// not used bits set to 0. + APInt getUsedBits() const { + // Reproduce the trunc(lshr) sequence: + // - Start from the truncated value. + // - Zero extend to the desired bit width. + // - Shift left. + assert(Origin && "No original load to compare against."); + unsigned BitWidth = Origin->getValueSizeInBits(0); + assert(Inst && "This slice is not bound to an instruction"); + assert(Inst->getValueSizeInBits(0) <= BitWidth && + "Extracted slice is bigger than the whole type!"); + APInt UsedBits(Inst->getValueSizeInBits(0), 0); + UsedBits.setAllBits(); + UsedBits = UsedBits.zext(BitWidth); + UsedBits <<= Shift; + return UsedBits; + } + + /// \brief Get the size of the slice to be loaded in bytes. + unsigned getLoadedSize() const { + unsigned SliceSize = getUsedBits().countPopulation(); + assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); + return SliceSize / 8; + } + + /// \brief Get the type that will be loaded for this slice. + /// Note: This may not be the final type for the slice. + EVT getLoadedType() const { + assert(DAG && "Missing context"); + LLVMContext &Ctxt = *DAG->getContext(); + return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); + } + + /// \brief Get the alignment of the load used for this slice. + unsigned getAlignment() const { + unsigned Alignment = Origin->getAlignment(); + unsigned Offset = getOffsetFromBase(); + if (Offset != 0) + Alignment = MinAlign(Alignment, Alignment + Offset); + return Alignment; + } + + /// \brief Check if this slice can be rewritten with legal operations. + bool isLegal() const { + // An invalid slice is not legal. + if (!Origin || !Inst || !DAG) + return false; + + // Offsets are for indexed load only, we do not handle that. + if (Origin->getOffset().getOpcode() != ISD::UNDEF) + return false; + + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + + // Check that the type is legal. + EVT SliceType = getLoadedType(); + if (!TLI.isTypeLegal(SliceType)) + return false; + + // Check that the load is legal for this type. + if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) + return false; + + // Check that the offset can be computed. + // 1. Check its type. + EVT PtrType = Origin->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + // 2. Check that it fits in the immediate. + if (!TLI.isLegalAddImmediate(getOffsetFromBase())) + return false; + + // 3. Check that the computation is legal. + if (!TLI.isOperationLegal(ISD::ADD, PtrType)) + return false; + + // Check that the zext is legal if it needs one. + EVT TruncateType = Inst->getValueType(0); + if (TruncateType != SliceType && + !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) + return false; + + return true; + } + + /// \brief Get the offset in bytes of this slice in the original chunk of + /// bits. + /// \pre DAG != NULL. + uint64_t getOffsetFromBase() const { + assert(DAG && "Missing context."); + bool IsBigEndian = + DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); + uint64_t Offset = Shift / 8; + unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; + assert(!(Origin->getValueSizeInBits(0) & 0x7) && + "The size of the original loaded type is not a multiple of a" + " byte."); + // If Offset is bigger than TySizeInBytes, it means we are loading all + // zeros. This should have been optimized before in the process. + assert(TySizeInBytes > Offset && + "Invalid shift amount for given loaded size"); + if (IsBigEndian) + Offset = TySizeInBytes - Offset - getLoadedSize(); + return Offset; + } + + /// \brief Generate the sequence of instructions to load the slice + /// represented by this object and redirect the uses of this slice to + /// this new sequence of instructions. + /// \pre this->Inst && this->Origin are valid Instructions and this + /// object passed the legal check: LoadedSlice::isLegal returned true. + /// \return The last instruction of the sequence used to load the slice. + SDValue loadSlice() const { + assert(Inst && Origin && "Unable to replace a non-existing slice."); + const SDValue &OldBaseAddr = Origin->getBasePtr(); + SDValue BaseAddr = OldBaseAddr; + // Get the offset in that chunk of bytes w.r.t. the endianess. + int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); + assert(Offset >= 0 && "Offset too big to fit in int64_t!"); + if (Offset) { + // BaseAddr = BaseAddr + Offset. + EVT ArithType = BaseAddr.getValueType(); + BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, + DAG->getConstant(Offset, ArithType)); + } + + // Create the type of the loaded slice according to its size. + EVT SliceType = getLoadedType(); + + // Create the load for the slice. + SDValue LastInst = DAG->getLoad( + SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), + Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + // If the final type is not the same as the loaded type, this means that + // we have to pad with zero. Create a zero extend for that. + EVT FinalType = Inst->getValueType(0); + if (SliceType != FinalType) + LastInst = + DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); + return LastInst; + } + + /// \brief Check if this slice can be merged with an expensive cross register + /// bank copy. E.g., + /// i = load i32 + /// f = bitcast i32 i to float + bool canMergeExpensiveCrossRegisterBankCopy() const { + if (!Inst || !Inst->hasOneUse()) + return false; + SDNode *Use = *Inst->use_begin(); + if (Use->getOpcode() != ISD::BITCAST) + return false; + assert(DAG && "Missing context"); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + EVT ResVT = Use->getValueType(0); + const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ArgRC = + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // At this point, we know that we perform a cross-register-bank copy. + // Check if it is expensive. + const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + // Assume bitcasts are cheap, unless both register classes do not + // explicitly share a common sub class. + if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) + return false; + + // Check if it will be merged with the load. + // 1. Check the alignment constraint. + unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + ResVT.getTypeForEVT(*DAG->getContext())); + + if (RequiredAlignment > getAlignment()) + return false; + + // 2. Check that the load is a legal operation for that type. + if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // 3. Check that we do not have a zext in the way. + if (Inst->getValueType(0) != getLoadedType()) + return false; + + return true; + } +}; +} + +/// \brief Sorts LoadedSlice according to their offset. +struct LoadedSliceSorter { + bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + } +}; + +/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// \p UsedBits looks like 0..0 1..1 0..0. +static bool areUsedBitsDense(const APInt &UsedBits) { + // If all the bits are one, this is dense! + if (UsedBits.isAllOnesValue()) + return true; + + // Get rid of the unused bits on the right. + APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); + // Get rid of the unused bits on the left. + if (NarrowedUsedBits.countLeadingZeros()) + NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); + // Check that the chunk of bits is completely used. + return NarrowedUsedBits.isAllOnesValue(); +} + +/// \brief Check whether or not \p First and \p Second are next to each other +/// in memory. This means that there is no hole between the bits loaded +/// by \p First and the bits loaded by \p Second. +static bool areSlicesNextToEachOther(const LoadedSlice &First, + const LoadedSlice &Second) { + assert(First.Origin == Second.Origin && First.Origin && + "Unable to match different memory origins."); + APInt UsedBits = First.getUsedBits(); + assert((UsedBits & Second.getUsedBits()) == 0 && + "Slices are not supposed to overlap."); + UsedBits |= Second.getUsedBits(); + return areUsedBitsDense(UsedBits); +} + +/// \brief Adjust the \p GlobalLSCost according to the target +/// paring capabilities and the layout of the slices. +/// \pre \p GlobalLSCost should account for at least as many loads as +/// there is in the slices in \p LoadedSlices. +static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, + LoadedSlice::Cost &GlobalLSCost) { + unsigned NumberOfSlices = LoadedSlices.size(); + // If there is less than 2 elements, no pairing is possible. + if (NumberOfSlices < 2) + return; + + // Sort the slices so that elements that are likely to be next to each + // other in memory are next to each other in the list. + std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); + // First (resp. Second) is the first (resp. Second) potentially candidate + // to be placed in a paired load. + const LoadedSlice *First = NULL; + const LoadedSlice *Second = NULL; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, + // Set the beginning of the pair. + First = Second) { + + Second = &LoadedSlices[CurrSlice]; + + // If First is NULL, it means we start a new pair. + // Get to the next slice. + if (!First) + continue; + + EVT LoadedType = First->getLoadedType(); + + // If the types of the slices are different, we cannot pair them. + if (LoadedType != Second->getLoadedType()) + continue; + + // Check if the target supplies paired loads for this type. + unsigned RequiredAlignment = 0; + if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { + // move to the next pair, this type is hopeless. + Second = NULL; + continue; + } + // Check if we meet the alignment requirement. + if (RequiredAlignment > First->getAlignment()) + continue; + + // Check that both loads are next to each other in memory. + if (!areSlicesNextToEachOther(*First, *Second)) + continue; + + assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); + --GlobalLSCost.Loads; + // Move to the next pair. + Second = NULL; + } +} + +/// \brief Check the profitability of all involved LoadedSlice. +/// Currently, it is considered profitable if there is exactly two +/// involved slices (1) which are (2) next to each other in memory, and +/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). +/// +/// Note: The order of the elements in \p LoadedSlices may be modified, but not +/// the elements themselves. +/// +/// FIXME: When the cost model will be mature enough, we can relax +/// constraints (1) and (2). +static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, + const APInt &UsedBits, bool ForCodeSize) { + unsigned NumberOfSlices = LoadedSlices.size(); + if (StressLoadSlicing) + return NumberOfSlices > 1; + + // Check (1). + if (NumberOfSlices != 2) + return false; + + // Check (2). + if (!areUsedBitsDense(UsedBits)) + return false; + + // Check (3). + LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); + // The original code has one big load. + OrigCost.Loads = 1; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { + const LoadedSlice &LS = LoadedSlices[CurrSlice]; + // Accumulate the cost of all the slices. + LoadedSlice::Cost SliceCost(LS, ForCodeSize); + GlobalSlicingCost += SliceCost; + + // Account as cost in the original configuration the gain obtained + // with the current slices. + OrigCost.addSliceGain(LS); + } + + // If the target supports paired load, adjust the cost accordingly. + adjustCostForPairing(LoadedSlices, GlobalSlicingCost); + return OrigCost > GlobalSlicingCost; +} + +/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// operations, split it in the various pieces being extracted. +/// +/// This sort of thing is introduced by SROA. +/// This slicing takes care not to insert overlapping loads. +/// \pre LI is a simple load (i.e., not an atomic or volatile load). +bool DAGCombiner::SliceUpLoad(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + !LD->getValueType(0).isInteger()) + return false; + + // Keep track of already used bits to detect overlapping values. + // In that case, we will just abort the transformation. + APInt UsedBits(LD->getValueSizeInBits(0), 0); + + SmallVector<LoadedSlice, 4> LoadedSlices; + + // Check if this load is used as several smaller chunks of bits. + // Basically, look for uses in trunc or trunc(lshr) and record a new chain + // of computation for each trunc. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + SDNode *User = *UI; + unsigned Shift = 0; + + // Check if this is a trunc(lshr). + if (User->getOpcode() == ISD::SRL && User->hasOneUse() && + isa<ConstantSDNode>(User->getOperand(1))) { + Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); + User = *User->use_begin(); + } + + // At this point, User is a Truncate, iff we encountered, trunc or + // trunc(lshr). + if (User->getOpcode() != ISD::TRUNCATE) + return false; + + // The width of the type must be a power of 2 and greater than 8-bits. + // Otherwise the load cannot be represented in LLVM IR. + // Moreover, if we shifted with a non 8-bits multiple, the slice + // will be accross several bytes. We do not support that. + unsigned Width = User->getValueSizeInBits(0); + if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) + return 0; + + // Build the slice for this chain of computations. + LoadedSlice LS(User, LD, Shift, &DAG); + APInt CurrentUsedBits = LS.getUsedBits(); + + // Check if this slice overlaps with another. + if ((CurrentUsedBits & UsedBits) != 0) + return false; + // Update the bits used globally. + UsedBits |= CurrentUsedBits; + + // Check if the new slice would be legal. + if (!LS.isLegal()) + return false; + + // Record the slice. + LoadedSlices.push_back(LS); + } + + // Abort slicing if it does not seem to be profitable. + if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + return false; + + ++SlicedLoads; + + // Rewrite each chain to use an independent load. + // By construction, each chain can be represented by a unique load. + + // Prepare the argument for the new token factor for all the slices. + SmallVector<SDValue, 8> ArgChains; + for (SmallVectorImpl<LoadedSlice>::const_iterator + LSIt = LoadedSlices.begin(), + LSItEnd = LoadedSlices.end(); + LSIt != LSItEnd; ++LSIt) { + SDValue SliceInst = LSIt->loadSlice(); + CombineTo(LSIt->Inst, SliceInst, true); + if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + SliceInst = SliceInst.getOperand(0); + assert(SliceInst->getOpcode() == ISD::LOAD && + "It takes more than a zext to get to the loaded slice!!"); + ArgChains.push_back(SliceInst.getValue(1)); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, + &ArgChains[0], ArgChains.size()); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + return true; +} + /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the /// load is having specific bytes cleared out. If so, return the byte size /// being masked out and the shift amount. @@ -7735,7 +8363,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign); + LD->isInvariant(), NewAlign, + LD->getTBAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), @@ -7846,17 +8475,28 @@ struct BaseIndexOffset { static BaseIndexOffset match(SDValue Ptr) { bool IsIndexSignExt = false; - // Just Base or possibly anything else. + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - // Base + offset. + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. if (isa<ConstantSDNode>(Ptr->getOperand(1))) { int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, IsIndexSignExt); } + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); SDValue IndexOffset = Ptr->getOperand(1); @@ -8007,6 +8647,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { Index = STn; break; } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + if (Ldn->isVolatile()) { + Index = NULL; + break; + } + // Save the load node for later. Continue the scan. AliasLoadNodes.push_back(Ldn); NextInChain = Ldn->getChain().getNode(); @@ -8384,7 +9029,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign); + ST->isNonTemporal(), OrigAlign, + ST->getTBAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -8399,7 +9045,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + switch (CFP->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. case MVT::f80: @@ -8412,8 +9058,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + Ptr, ST->getMemOperand()); } break; case MVT::f64: @@ -8423,8 +9068,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), MVT::i64); return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + Ptr, ST->getMemOperand()); } if (!ST->isVolatile() && @@ -8440,18 +9084,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment()); + ST->getAlignment(), TBAAInfo); Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment); + Alignment, TBAAInfo); return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -8467,7 +9112,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Align > ST->getAlignment()) return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align, + ST->getTBAAInfo()); } } @@ -8477,7 +9123,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -8488,14 +9136,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + ST->getMemoryVT(), ST->getMemOperand()); } else { ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ST->getMemOperand()); } // Create token to keep both nodes around. @@ -8528,9 +9172,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + Ptr, ST->getMemoryVT(), ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. @@ -8561,9 +9203,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + Ptr, ST->getMemoryVT(), ST->getMemOperand()); } // Only perform this optimization before the types are legal, because we @@ -8821,13 +9461,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ? ISD::ZEXTLOAD : ISD::EXTLOAD; Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + LVT, LN0->isVolatile(), LN0->isNonTemporal(), + Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); } else { Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + LN0->isInvariant(), Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); if (NVT.bitsLT(LVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); @@ -9165,8 +9806,35 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return N->getOperand(0); // Check if all of the operands are undefs. + EVT VT = N->getValueType(0); if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(N->getValueType(0)); + return DAG.getUNDEF(VT); + + // Optimize concat_vectors where one of the vectors is undef. + if (N->getNumOperands() == 2 && + N->getOperand(1)->getOpcode() == ISD::UNDEF) { + SDValue In = N->getOperand(0); + assert(In.getValueType().isVector() && "Must concat vectors"); + + // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). + if (In->getOpcode() == ISD::BITCAST && + !In->getOperand(0)->getValueType(0).isVector()) { + SDValue Scalar = In->getOperand(0); + EVT SclTy = Scalar->getValueType(0); + + if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) + return SDValue(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, + VT.getSizeInBits() / SclTy.getSizeInBits()); + if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) + return SDValue(); + + SDLoc dl = SDLoc(N); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + return DAG.getNode(ISD::BITCAST, dl, VT, Res); + } + } // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. @@ -9225,7 +9893,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // (extract_subvec (concat V1, V2, ...), i) // Into: // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -9358,10 +10027,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= 0) { - if (Idx < (int)NumElts) - Idx += NumElts; - else + if (Idx >= (int)NumElts) Idx -= NumElts; + else + Idx = -1; // remove reference to lhs } NewMask.push_back(Idx); } @@ -9738,7 +10407,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), - // FIXME: Discards pointer info. + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->isInvariant(), LLD->getAlignment()); @@ -9747,7 +10416,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, RLD->getExtensionType() : LLD->getExtensionType(), SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer info. + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->getAlignment()); @@ -9852,7 +10521,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); AddToWorkList(CstOffset.getNode()); - CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, @@ -9974,9 +10643,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return Temp; // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + return DAG.getNode( + ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); } } @@ -10132,17 +10802,20 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; + // If they are both volatile then they cannot be reordered. + if (IsVolatile1 && IsVolatile2) return true; + // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; @@ -10187,7 +10860,9 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, return false; } - if (CombinerGlobalAA) { + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA && SrcValue1 && SrcValue2) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; @@ -10206,24 +10881,25 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { SDValue Ptr0, Ptr1; int64_t Size0, Size1; + bool IsVolatile0, IsVolatile1; const Value *SrcValue0, *SrcValue1; int SrcValueOffset0, SrcValueOffset1; unsigned SrcValueAlign0, SrcValueAlign1; const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, SrcValue1, SrcValueOffset1, + Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); } /// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a load. +/// node. Returns true if the operand was a nonvolatile load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlign, @@ -10232,11 +10908,12 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, Ptr = LS->getBasePtr(); Size = LS->getMemoryVT().getSizeInBits() >> 3; + IsVolatile = LS->isVolatile(); SrcValue = LS->getSrcValue(); SrcValueOffset = LS->getSrcValueOffset(); SrcValueAlign = LS->getOriginalAlignment(); TBAAInfo = LS->getTBAAInfo(); - return isa<LoadSDNode>(LS); + return isa<LoadSDNode>(LS) && !IsVolatile; } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, @@ -10249,12 +10926,13 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // Get alias information for node. SDValue Ptr; int64_t Size; + bool IsVolatile; const Value *SrcValue; int SrcValueOffset; unsigned SrcValueAlign; const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo); + bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, + SrcValueOffset, SrcValueAlign, SrcTBAAInfo); // Starting off. Chains.push_back(OriginalChain); @@ -10295,20 +10973,21 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // Get alias information for Chain. SDValue OpPtr; int64_t OpSize; + bool OpIsVolatile; const Value *OpSrcValue; int OpSrcValueOffset; unsigned OpSrcValueAlign; const MDNode *OpSrcTBAAInfo; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset, + OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, - SrcTBAAInfo, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo, + OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo)) { Aliases.push_back(Chain); } else { diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index b4ac948f..a6f7461 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -638,29 +638,25 @@ bool FastISel::SelectCall(const User *I) { (!isa<AllocaInst>(Address) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), - false); + false); - if (Op) + if (Op) { if (Op->isReg()) { - // Set the indirect flag if the type and the DIVariable's - // indirect field are in disagreement: Indirectly-addressed - // variables that are nonpointer types should be marked as - // indirect, and VLAs should be marked as indirect eventhough - // they are a pointer type. - bool IsIndirect = DI->getAddress()->getType()->isPointerTy() - ^ DIVar.isIndirect(); Op->setIsDebug(true); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, Op->getReg(), Offset, DI->getVariable()); + TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, + DI->getVariable()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::DBG_VALUE)).addOperand(*Op).addImm(0) - .addMetadata(DI->getVariable()); - else + TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()); + } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } return true; } case Intrinsic::dbg_value: { @@ -688,6 +684,7 @@ bool FastISel::SelectCall(const User *I) { .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { + // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, Reg, DI->getOffset(), DI->getVariable()); @@ -1574,4 +1571,19 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); } +bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { + // Must be an add. + if (!isa<AddOperator>(Add)) + return false; + // Type size needs to match. + if (TD.getTypeSizeInBits(GEP->getType()) != + TD.getTypeSizeInBits(Add->getType())) + return false; + // Must be in the same basic block. + if (isa<Instruction>(Add) && + FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB) + return false; + // Must have a constant operand. + return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); +} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e107276..3a8fb85 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -211,6 +212,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -218,6 +220,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // If the register class is unknown for the given definition, then try to + // infer one from the value type. + if (!RC && i < NumResults) + RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -722,10 +728,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + const uint16_t *ScratchRegs = NULL; + + // Handle PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::PATCHPOINT) { + unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } + unsigned NumImpUses = 0; unsigned NodeOperands = - countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); - bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -748,14 +764,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. - bool HasOptPRefs = II.getNumDefs() > NumResults; + bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); - unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + // Add scratch registers as implicit def and early clobber + if (ScratchRegs) + for (unsigned i = 0; ScratchRegs[i]; ++i) + MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | + RegState::EarlyClobber); + // Transfer all of the memory reference descriptions of this instruction. MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); @@ -784,8 +806,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { - for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + for (unsigned i = NumDefs; i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bd844e5..9061ae9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -95,8 +95,8 @@ private: SDValue N1, SDValue N2, ArrayRef<int> Mask) const; - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - SDLoc dl); + bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, @@ -311,6 +311,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); + unsigned AS = ST->getAddressSpace(); + SDLoc dl(ST); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { @@ -343,7 +345,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, MachinePointerInfo(), StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS)); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -381,7 +383,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, .getWithOffset(Offset), MemVT, ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); + MinAlign(ST->getAlignment(), Offset), + ST->getTBAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -408,13 +411,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, ST->getPointerInfo(), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, TLI.getPointerTy(AS))); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment); + Alignment, ST->getTBAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -438,10 +442,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (LoadedVT != VT) Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : @@ -474,7 +476,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo(), false, false, 0)); @@ -492,7 +495,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->isVolatile(), LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -536,23 +540,25 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } // aggregate the two parts @@ -655,6 +661,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && @@ -663,7 +670,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -672,7 +679,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -685,12 +692,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(4)); + DAG.getConstant(4, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); + isVolatile, isNonTemporal, MinAlign(Alignment, 4U), + TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -708,6 +716,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -745,7 +754,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -767,7 +776,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -788,19 +798,20 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -809,16 +820,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment); + RoundVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } // The order of the stores doesn't matter. @@ -854,7 +866,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -902,9 +914,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote loads to same size type"); - SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); RChain = Res.getValue(1); break; @@ -924,6 +934,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -950,7 +961,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); Ch = Result.getValue(1); // The chain. @@ -987,16 +998,16 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1016,17 +1027,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1079,9 +1090,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { case TargetLowering::Expand: if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->getMemOperand()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1109,9 +1118,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + Chain, Ptr, SrcVT, + LD->getMemOperand()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1386,11 +1394,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) @@ -1428,11 +1432,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); @@ -1531,7 +1531,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, DAG.getIntPtrConstant(ByteOffset)); + LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1580,10 +1581,10 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - if (Align > StackAlign) - SP = DAG.getNode(ISD::AND, dl, VT, SP, - DAG.getConstant(-(uint64_t)Align, VT)); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), @@ -1595,22 +1596,44 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, } /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. This routine expands SETCC with -/// illegal condition code into AND / OR of multiple SETCC values. -void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, +/// condition code CC on the current target. +/// +/// If the SETCC has been legalized using AND / OR, then the legalized node +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// +/// If the SETCC has been legalized by using getSetCCSwappedOperands(), +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a +/// true/false result. +/// +/// \returns true if the SetCC has been legalized, false if it hasn't. +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; case TargetLowering::Expand: { + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + std::swap(LHS, RHS); + CC = DAG.getCondCode(InvCC); + return true; + } ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; - ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); @@ -1650,18 +1673,21 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: + // We only support using the inverted operation, which is computed above + // and not a different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETNE: case ISD::SETEQ: - InvCC = ISD::getSetCCSwappedOperands(CCCode); - if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { - // We only support using the inverted operation and not a - // different manner of supporting expanding these cases. - llvm_unreachable("Don't know how to expand this condition!"); + // Try inverting the result of the inverse condition. + InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + return true; } - LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); - RHS = SDValue(); - CC = SDValue(); - return; + // If inverting the condition didn't work then we have no means to expand + // the condition. + llvm_unreachable("Don't know how to expand this condition!"); } SDValue SetCC1, SetCC2; @@ -1678,9 +1704,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); - break; + return true; } } + return false; } /// EmitStackConvert - Emit a store/load combination to the stack. This stores @@ -1969,7 +1996,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; @@ -1987,7 +2014,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; @@ -2002,7 +2029,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2049,7 +2076,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned = Opcode == ISD::SDIVREM; RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2106,7 +2133,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, /// isSinCosLibcallAvailable - Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2156,7 +2183,7 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2232,11 +2259,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; - SDValue Lo = DAG.getNode(ISD::ADD, dl, - TLI.getPointerTy(), StackSlot, WordOff); + SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), + StackSlot, WordOff); if (TLI.isLittleEndian()) std::swap(Hi, Lo); @@ -2382,7 +2409,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; - switch (Op0.getValueType().getSimpleVT().SimpleTy) { + switch (Op0.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) @@ -2395,7 +2422,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) @@ -2656,6 +2683,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -2665,6 +2693,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -2674,6 +2703,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -2683,6 +2713,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -2692,6 +2723,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -2701,6 +2733,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -2710,6 +2743,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -2719,6 +2753,47 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; + } + break; + case ISD::ATOMIC_LOAD_MAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_MIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; } break; } @@ -2730,6 +2805,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: @@ -2947,20 +3023,20 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Align > TLI.getMinStackArgumentAlignment()) { assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(Align - 1, - TLI.getPointerTy())); + VAList.getValueType())); - VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, DAG.getConstant(-(int64_t)Align, - TLI.getPointerTy())); + VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), - TLI.getPointerTy())); + VAList.getValueType())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, MachinePointerInfo(V), false, false, 0); @@ -3231,6 +3307,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, @@ -3565,9 +3648,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, PTy, - Index, DAG.getConstant(EntrySize, PTy)); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), + Index, DAG.getConstant(EntrySize, Index.getValueType())); + SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), + Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, @@ -3611,10 +3695,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); - LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, + Tmp3, NeedInvert, dl); + + if (Legalized) { + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. + if (Tmp3.getNode()) + Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); - // If we expanded the SETCC into an AND/OR, return the new node - if (Tmp2.getNode() == 0) { Results.push_back(Tmp1); break; } @@ -3645,14 +3740,52 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp4 = Node->getOperand(3); // False SDValue CC = Node->getOperand(4); - LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, CC, dl); + bool Legalized = false; + // Try to legalize by inverting the condition. This is for targets that + // might support an ordered version of a condition, but not the unordered + // version (or vice versa). + ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + Tmp1.getValueType().isInteger()); + if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { + // Use the new condition code and swap true and false + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); + } else { + // If The inverse is not legal, then try to swap the arguments using + // the inverse condition code. + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); + if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { + // The swapped inverse condition is legal, so swap true and false, + // lhs and rhs. + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); + } + } + + if (!Legalized) { + Legalized = LegalizeSetCCCondCode( + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); + + assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); - assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); - Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); - CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. + if (CC.getNode()) { + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3, Tmp4, CC); + } else { + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + } + } Results.push_back(Tmp1); break; } @@ -3662,14 +3795,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), - Tmp2, Tmp3, Tmp4, dl); - - assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); - Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); - Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + (void)Legalized; + assert(Legalized && "Can't legalize BR_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC + // node. + if (Tmp4.getNode()) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, + Tmp4, Tmp2, Tmp3, Node->getOperand(4)); + } else { + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + } Results.push_back(Tmp1); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index cea0b02..ecf4c5d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; @@ -160,7 +161,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -172,7 +173,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -226,7 +227,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { @@ -239,7 +240,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { @@ -251,7 +252,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { @@ -263,7 +264,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { @@ -275,7 +276,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { @@ -287,7 +288,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { @@ -299,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { @@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -325,7 +326,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, SDLoc(N)); + NVT, Ops, 3, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -338,7 +339,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -350,7 +351,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -364,7 +365,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -372,7 +373,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -381,7 +382,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - SDLoc(N)); + SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { @@ -402,7 +403,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -416,7 +417,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -429,7 +430,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { @@ -441,7 +442,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { @@ -453,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { @@ -465,7 +478,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { @@ -478,7 +491,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -490,7 +503,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -504,7 +517,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), NVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -516,7 +530,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -585,7 +600,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl); + &Op, 1, false, dl).first; } @@ -645,7 +660,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -676,7 +691,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { @@ -684,14 +699,14 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -754,9 +769,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { Val = GetSoftenedFloat(Val); return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ST->getMemOperand()); } @@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; @@ -912,7 +926,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), N->getValueType(0), Ops, 2, false, - SDLoc(N)); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -986,7 +1000,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, 3, false, - SDLoc(N)); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1000,7 +1014,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), N->getValueType(0), Ops, 2, false, - SDLoc(N)); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), @@ -1102,7 +1128,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), N->getValueType(0), Ops, 2, false, - SDLoc(N)); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1134,8 +1160,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, - LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->getMemOperand()); // Remember the chain. Chain = Hi.getValue(1); @@ -1181,7 +1206,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1251,6 +1276,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; @@ -1325,6 +1351,17 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { N->getOperand(4)), 0); } +SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(1), Lo, Hi); + // The ppcf128 value is providing only the sign; take it from the + // higher-order double (which must have the larger magnitude). + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), + N->getValueType(0), N->getOperand(0), Hi); +} + SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); @@ -1353,7 +1390,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1386,7 +1423,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, - false, dl); + false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { @@ -1445,7 +1482,5 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { GetExpandedOp(ST->getValue(), Lo, Hi); return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + ST->getMemoryVT(), ST->getMemOperand()); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index ff8f1f9..4255948 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -417,9 +417,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); SDLoc dl(N); SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), - N->getPointerInfo(), - N->getMemoryVT(), N->isVolatile(), - N->isNonTemporal(), N->getAlignment()); + N->getMemoryVT(), N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -919,7 +917,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // type does not have a strange size (eg: it is not i1). EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be @@ -1037,17 +1036,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); - unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); SDLoc dl(N); SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. // Truncate the value and store the result. - return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), - N->getMemoryVT(), - isVolatile, isNonTemporal, Alignment); + return DAG.getTruncStore(Ch, dl, Val, Ptr, + N->getMemoryVT(), N->getMemOperand()); } SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { @@ -1193,6 +1188,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -1202,6 +1198,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -1211,6 +1208,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -1220,6 +1218,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -1229,6 +1228,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -1238,6 +1238,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -1247,6 +1248,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -1256,6 +1258,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; } break; } @@ -1770,7 +1773,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1781,7 +1785,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1803,6 +1808,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); bool isInvariant = N->isInvariant(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDLoc dl(N); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1811,7 +1817,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, Alignment); + MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -1833,7 +1839,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1842,11 +1849,11 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1864,17 +1871,17 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1997,7 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, + dl).first, Lo, Hi); } @@ -2060,7 +2068,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -2155,7 +2163,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo, + Hi); return; } @@ -2238,7 +2247,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -2378,7 +2387,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -2398,7 +2407,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -2685,7 +2694,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2702,6 +2711,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDLoc dl(N); SDValue Lo, Hi; @@ -2711,7 +2721,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment); + Alignment, TBAAInfo); } if (TLI.isLittleEndian()) { @@ -2719,7 +2729,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2728,11 +2738,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2760,17 +2770,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment); + HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2835,7 +2845,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); - FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), + FudgePtr, Offset); Alignment = std::min(Alignment, 4u); // Load the value out, extending it from f32 to the destination float type. @@ -2852,7 +2863,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index fd770d1..eb13230 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -958,20 +958,6 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { return SDValue(N->getOperand(ResNo)); } -/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type -/// which is split into two not necessarily identical pieces. -void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { - // Currently all types are split in half. - if (!InVT.isVector()) { - LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - } else { - unsigned NumElements = InVT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - } -} - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void DAGTypeLegalizer::GetPairElements(SDValue Pair, @@ -988,10 +974,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index) { SDLoc dl(Index); // Make sure the index type is big enough to compute in. - if (Index.getValueType().bitsGT(TLI.getPointerTy())) - Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); - else - Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy()); // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. @@ -1024,20 +1007,23 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, + dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, + dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, + dl).first; } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); return TLI.makeLibCall(DAG, LC, N->getValueType(0), - &Ops[0], NumOps, isSigned, dl); + &Ops[0], NumOps, isSigned, dl).first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 63e9af3..13bb08f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -410,6 +410,7 @@ private: SDValue SoftenFloatRes_FPOWI(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -470,6 +471,7 @@ private: void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -480,6 +482,7 @@ private: // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); @@ -534,7 +537,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); - SDValue ScalarizeVecOp_EXTEND(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -558,6 +561,7 @@ private: void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -628,6 +632,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); @@ -699,10 +704,6 @@ private: GetExpandedFloat(Op, Lo, Hi); } - /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type - /// which is split (or expanded) into two not necessarily identical pieces. - void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); @@ -730,6 +731,12 @@ private: GetExpandedFloat(Op, Lo, Hi); } + + /// This function will split the integer \p Op into \p NumElements + /// operations of type \p EltVT and store them in \p Ops. + void IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, EVT EltVT); + // Generic Result Expansion. void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 96f6143..c749fde 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -77,13 +77,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - InVT.getVectorNumElements()/2); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getConstant(InNVT.getVectorNumElements(), - TLI.getVectorIdxTy())); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); @@ -169,7 +165,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, + StackPtr.getValueType())); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -253,20 +250,22 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -307,6 +306,25 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Generic Operand Expansion. //===--------------------------------------------------------------------===// +void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, + EVT EltVT) { + assert(Op.getValueType().isInteger()); + SDLoc DL(Op); + SDValue Parts[2]; + + if (NumElements > 1) { + NumElements >>= 1; + SplitInteger(Op, Parts[0], Parts[1]); + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + IntegerToVector(Parts[0], NumElements, Ops, EltVT); + IntegerToVector(Parts[1], NumElements, Ops, EltVT); + } else { + Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op)); + } +} + SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SDLoc dl(N); if (N->getValueType(0).isVector()) { @@ -315,21 +333,27 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. + // + // FIXME: I'm not sure why we are first trying to split the input into + // a 2 element vector, so I'm leaving it here to maintain the current + // behavior. + unsigned NumElts = 2; EVT OVT = N->getOperand(0).getValueType(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), - 2); - - if (isTypeLegal(NVT)) { - SDValue Parts[2]; - GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + NumElts); + if (!isTypeLegal(NVT)) { + // If we can't find a legal type by splitting the integer in half, + // then we can use the node's value type. + NumElts = N->getValueType(0).getVectorNumElements(); + NVT = N->getValueType(0); + } - if (TLI.isBigEndian()) - std::swap(Parts[0], Parts[1]); + SmallVector<SDValue, 8> Ops; + IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); - } + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } // Otherwise, store to a temporary and load out again as the new type. @@ -439,6 +463,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); + const MDNode *TBAAInfo = St->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -450,15 +475,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); - assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -489,14 +513,12 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue Cond = N->getOperand(0); CL = CH = Cond; if (Cond.getValueType().isVector()) { - assert(Cond.getValueType().getVectorElementType() == MVT::i1 && - "Condition legalized before result?"); - unsigned NumElements = Cond.getValueType().getVectorNumElements(); - EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); - CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getConstant(0, TLI.getVectorIdxTy())); - CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy())); + // Check if there are already splitted versions of the vector available and + // use those instead of splitting the mask operand again. + if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Cond, CL, CH); + else + llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -518,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index bbe11b8..2c3cdcc 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -171,7 +171,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: Changed = true; - return LegalizeOp(TLI.LowerOperation(Result, DAG)); + return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandStore(Op)); @@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: @@ -241,6 +242,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FROUND: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: @@ -416,7 +418,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->isInvariant(), LD->getAlignment(), + LD->getTBAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -426,13 +429,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->getAlignment(), + LD->getTBAAInfo()); } RemainingBytes -= LoadBytes; Offset += LoadBytes; BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(LoadBytes)); + DAG.getConstant(LoadBytes, BasePTR.getValueType())); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -497,10 +501,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->getAlignment(), LD->getTBAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -529,6 +533,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); unsigned NumElem = StVT.getVectorNumElements(); // The type of the data we want to save @@ -556,10 +561,10 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Stores.push_back(Store); } @@ -597,10 +602,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); // Generate a mask operand. - EVT MaskTy = TLI.getSetCCResultType(*DAG.getContext(), VT); - assert(MaskTy.isVector() && "Invalid CC type"); - assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() - && "Invalid mask size"); + EVT MaskTy = VT.changeVectorElementTypeToInteger(); // What is the size of each element in the vector mask. EVT BitTy = MaskTy.getScalarType(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 54380ec..f7a3e3d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -97,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: @@ -215,7 +217,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment(), + N->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -369,7 +372,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: - Res = ScalarizeVecOp_EXTEND(N); + case ISD::TRUNCATE: + Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); @@ -408,7 +412,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { /// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs /// to be scalarized, it must be <1 x ty>. Extend the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); @@ -455,12 +459,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getAlignment()); + N->getAlignment(), N->getTBAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->getOriginalAlignment(), N->getTBAAInfo()); } @@ -517,7 +521,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; - case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -540,21 +543,27 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: - case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: - case ISD::ZERO_EXTEND: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + SplitVecRes_ExtendOp(N, Lo, Hi); + break; + case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::SDIV: @@ -615,7 +624,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SDLoc dl(N); SDValue InOp = N->getOperand(0); @@ -670,7 +679,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); @@ -691,7 +700,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); @@ -707,7 +716,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); @@ -731,7 +740,8 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = + DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, DAG.getValueType(LoVT)); @@ -783,7 +793,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -794,7 +804,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -804,7 +814,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(LD); - GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -815,20 +825,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment); + isInvariant, Alignment, TBAAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -847,24 +859,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc DL(N); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. - EVT InVT = N->getOperand(0).getValueType(); SDValue LL, LH, RL, RH; - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getConstant(0, TLI.getVectorIdxTy())); - LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getConstant(InNVT.getVectorNumElements(), - TLI.getVectorIdxTy())); - - RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getConstant(0, TLI.getVectorIdxTy())); - RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getConstant(InNVT.getVectorNumElements(), - TLI.getVectorIdxTy())); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -875,22 +875,15 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; SDLoc dl(N); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); - } else { - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getConstant(0, TLI.getVectorIdxTy())); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getConstant(InNVT.getVectorNumElements(), - TLI.getVectorIdxTy())); - } + else + llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -913,6 +906,58 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, } } +void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DestVT = N->getValueType(0); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + + // We can do better than a generic split operation if the extend is doing + // more than just doubling the width of the elements and the following are + // true: + // - The number of vector elements is even, + // - the source type is legal, + // - the type of a split source is illegal, + // - the type of an extended (by doubling element size) source is legal, and + // - the type of that extended source when split is legal. + // + // This won't necessarily completely legalize the operation, but it will + // more effectively move in the right direction and prevent falling down + // to scalarization in many cases due to the input vector being split too + // far. + unsigned NumElements = SrcVT.getVectorNumElements(); + if ((NumElements & 1) == 0 && + SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + LLVMContext &Ctx = *DAG.getContext(); + EVT NewSrcVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT( + Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), + NumElements); + EVT SplitSrcVT = + EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT SplitLoVT, SplitHiVT; + llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && + TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { + DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + } + // Fall back to the generic unary operator splitting otherwise. + SplitVecRes_UnaryOp(N, Lo, Hi); +} + void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi) { // The low and high parts of the original input give four input vectors. @@ -1105,41 +1150,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { SDValue Mask = N->getOperand(0); SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); + EVT Src0VT = Src0.getValueType(); SDLoc DL(N); - EVT MaskVT = Mask.getValueType(); - assert(MaskVT.isVector() && "VSELECT without a vector mask?"); + assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); assert(Lo.getValueType() == Hi.getValueType() && "Lo and Hi have differing types"); - unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); - unsigned HiNumElts = Hi.getValueType().getVectorNumElements(); - assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); - - LLVMContext &Ctx = *DAG.getContext(); - SDValue Zero = DAG.getConstant(0, TLI.getVectorIdxTy()); - SDValue LoElts = DAG.getConstant(LoNumElts, TLI.getVectorIdxTy()); - EVT Src0VT = Src0.getValueType(); - EVT Src0EltTy = Src0VT.getVectorElementType(); - EVT MaskEltTy = MaskVT.getVectorElementType(); - - EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts); - EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts); - EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts); - EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts); + EVT LoOpVT, HiOpVT; + llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); - SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero); - SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero); - - SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts); - SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts); - - SDValue LoMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero); - SDValue HiMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts); + SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; + llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1249,33 +1276,34 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); bool isNT = N->isNonTemporal(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment); + LoMemVT, isVol, isNT, Alignment, TBAAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment); + HiMemVT, isVol, isNT, Alignment, TBAAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -1341,13 +1369,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { SDLoc DL(N); // Extract the halves of the input via extract_subvector. - EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getConstant(0, TLI.getVectorIdxTy())); - SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getConstant(NumElements/2, - TLI.getVectorIdxTy())); + SDValue InLoVec, InHiVec; + llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -1446,27 +1469,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::ADD: case ISD::AND: case ISD::BSWAP: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; + case ISD::FADD: case ISD::FCOPYSIGN: - case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FREM: case ISD::FSUB: - case ISD::MUL: - case ISD::MULHS: - case ISD::MULHU: - case ISD::OR: + case ISD::FDIV: + case ISD::FREM: case ISD::SDIV: - case ISD::SREM: case ISD::UDIV: + case ISD::SREM: case ISD::UREM: - case ISD::SUB: - case ISD::XOR: - Res = WidenVecRes_Binary(N); + Res = WidenVecRes_BinaryCanTrap(N); break; case ISD::FPOWI: @@ -1507,6 +1534,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -1534,6 +1562,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. unsigned Opcode = N->getOpcode(); SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -2532,6 +2569,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2541,7 +2579,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Align); + isVolatile, isNonTemporal, isInvariant, Align, + TBAAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2577,7 +2616,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); SDValue L; if (LdWidth < NewVTWidth) { @@ -2586,7 +2625,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -2602,7 +2642,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); } @@ -2682,6 +2723,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2693,15 +2735,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, Align); + LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, + DAG.getConstant(Offset, + BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -2724,6 +2768,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -2750,12 +2795,12 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store @@ -2770,11 +2815,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; @@ -2792,6 +2837,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -2814,17 +2860,19 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, - isVolatile, isNonTemporal, Align)); + isVolatile, isNonTemporal, Align, + TBAAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, DAG.getConstant(Offset, + BasePtr.getValueType())); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); } } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index d684164..1dd2128 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -389,10 +389,9 @@ signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { // Constants used to denote relative importance of // heuristic components for cost computation. static const unsigned PriorityOne = 200; -static const unsigned PriorityTwo = 100; -static const unsigned PriorityThree = 50; -static const unsigned PriorityFour = 15; -static const unsigned PriorityFive = 5; +static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 15; +static const unsigned PriorityFour = 5; static const unsigned ScaleOne = 20; static const unsigned ScaleTwo = 10; static const unsigned ScaleThree = 5; @@ -449,7 +448,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { if (N->isMachineOpcode()) { const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); if (TID.isCall()) - ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); } else switch (N->getOpcode()) { @@ -457,11 +456,11 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { case ISD::TokenFactor: case ISD::CopyFromReg: case ISD::CopyToReg: - ResCount += PriorityFive; + ResCount += PriorityFour; break; case ISD::INLINEASM: - ResCount += PriorityFour; + ResCount += PriorityThree; break; } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f5fe168..1a562d7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -718,7 +718,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); - // Reserve resources for the scheduled intruction. + // Reserve resources for the scheduled instruction. EmitNode(SU); Sequence.push_back(SU); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 982dcc9..054e3dd 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -690,15 +690,6 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { } #endif // NDEBUG -namespace { - struct OrderSorter { - bool operator()(const std::pair<unsigned, MachineInstr*> &A, - const std::pair<unsigned, MachineInstr*> &B) { - return A.first < B.first; - } - }; -} - /// ProcessSDDbgValues - Process SDDbgValues associated with this node. static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, @@ -744,7 +735,10 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || + // Fast-isel may have inserted some instructions, in which case the + // BB->back().isPHI() test will not fire when we want it to. + prior(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; @@ -857,7 +851,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Sort the source order instructions and use the order to insert debug // values. - std::sort(Orders.begin(), Orders.end(), OrderSorter()); + std::sort(Orders.begin(), Orders.end(), less_first()); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bc6063c..45d5a4f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -869,16 +869,19 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), OptLevel(OL), + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), UpdateListeners(0) { + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(0) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) { +void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, + const TargetLowering *tli) { MF = &mf; TTI = tti; + TLI = tli; Context = &mf.getFunction()->getContext(); } @@ -983,6 +986,54 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } + // In other cases the element type is illegal and needs to be expanded, for + // example v2i64 on MIPS32. In this case, find the nearest legal type, split + // the value into n parts and use a vector type with n-times the elements. + // Then bitcast to the type requested. + // Legalizing constants too early makes the DAGCombiner's job harder so we + // only legalize if the DAG tells us we must produce legal types. + else if (NewNodesMustHaveLegalTypes && VT.isVector() && + TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypeExpandInteger) { + APInt NewVal = Elt->getValue(); + EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; + EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); + + // Check the temporary vector is the correct size. If this fails then + // getTypeToTransformTo() probably returned a type whose size (in bits) + // isn't a power-of-2 factor of the requested type size. + assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); + + SmallVector<SDValue, 2> EltParts; + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { + EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) + .trunc(ViaEltSizeInBits), + ViaEltVT, isT)); + } + + // EltParts is currently in little endian order. If we actually want + // big-endian order then reverse it now. + if (TLI->isBigEndian()) + std::reverse(EltParts.begin(), EltParts.end()); + + // The elements must be reversed when the element order is different + // to the endianness of the elements (because the BITCAST is itself a + // vector shuffle in this situation). However, we do not need any code to + // perform this reversal because getConstant() is producing a vector + // splat. + // This situation occurs in MIPS MSA. + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); + + SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, + getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, + &Ops[0], Ops.size())); + return Result; + } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); @@ -1077,9 +1128,10 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); + const TargetLowering *TLI = TM.getTargetLowering(); // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TM.getTargetLowering()->getPointerTy().getSizeInBits(); + unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); @@ -1298,11 +1350,8 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *Mask) { - assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); - assert(VT.isVector() && N1.getValueType().isVector() && - "Vector Shuffle VTs must be a vectors"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() - && "Vector Shuffle VTs must have same element type"); + assert(VT == N1.getValueType() && VT == N2.getValueType() && + "Invalid VECTOR_SHUFFLE"); // Canonicalize shuffle undef, undef -> undef if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) @@ -1351,17 +1400,13 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, commuteShuffle(N1, N2, MaskVec); } - // If Identity shuffle, or all shuffle in to undef, return that node. - bool AllUndef = true; + // If Identity shuffle return that node. bool Identity = true; for (unsigned i = 0; i != NElts; ++i) { if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; - if (MaskVec[i] >= 0) AllUndef = false; } - if (Identity && NElts == N1.getValueType().getVectorNumElements()) + if (Identity && NElts) return N1; - if (AllUndef) - return getUNDEF(VT); FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; @@ -1380,7 +1425,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); ShuffleVectorSDNode *N = - new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), N1, N2, MaskAlloc); + new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), N1, N2, + MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1403,8 +1450,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), Ops, 5, - Code); + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), + Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1447,7 +1495,8 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), dl.getDebugLoc(), Root, Label); + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), + dl.getDebugLoc(), Root, Label); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1510,6 +1559,26 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { return SDValue(N, 0); } +/// getAddrSpaceCast - Return an AddrSpaceCastSDNode. +SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, + unsigned SrcAS, unsigned DestAS) { + SDValue Ops[] = {Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1); + ID.AddInteger(SrcAS); + ID.AddInteger(DestAS); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), + dl.getDebugLoc(), + VT, Ptr, SrcAS, DestAS); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. @@ -1561,7 +1630,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETFALSE: case ISD::SETFALSE2: return getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return getConstant(1, VT); + case ISD::SETTRUE2: { + const TargetLowering *TLI = TM.getTargetLowering(); + TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + return getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } case ISD::SETOEQ: case ISD::SETOGT: @@ -1643,7 +1717,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } } else { // Ensure that the constant occurs on the RHS. - return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + MVT CompVT = N1.getValueType().getSimpleVT(); + if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT)) + return SDValue(); + + return getSetCC(dl, VT, N2, N1, SwappedCond); } } @@ -1942,7 +2021,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); @@ -2054,7 +2132,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. @@ -2150,7 +2227,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } case ISD::SIGN_EXTEND: - Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + Tmp = + VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: @@ -2411,7 +2489,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), getVTList(VT)); + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2672,10 +2751,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); } AllNodes.push_back(N); @@ -3073,9 +3154,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (VT.isSimple() && N1.getValueType().isSimple()) { assert(VT.isVector() && N1.getValueType().isVector() && "Extract subvector VTs must be a vectors!"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && + assert(VT.getVectorElementType() == + N1.getValueType().getVectorElementType() && "Extract subvector VTs must have the same element type!"); - assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); if (isa<ConstantSDNode>(Index.getNode())) { @@ -3086,7 +3168,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Trivial extraction. - if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N1.getSimpleValueType()) return N1; } break; @@ -3244,10 +3326,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); } AllNodes.push_back(N); @@ -3316,7 +3400,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, "Insert subvector VTs must be a vectors"); assert(VT == N1.getValueType() && "Dest and insert subvector source types must match!"); - assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && "Insert subvector must be from smaller vector to larger vector!"); if (isa<ConstantSDNode>(Index.getNode())) { assert((N2.getValueType().getVectorNumElements() + @@ -3326,7 +3410,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } // Trivial insertion. - if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N2.getSimpleValueType()) return N2; } break; @@ -3349,10 +3433,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); } AllNodes.push_back(N); @@ -3771,7 +3857,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Value; Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, dl, DAG), @@ -3787,7 +3873,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Store; Store = DAG.getStore(Chain, dl, LoadValues[i], getMemBasePlusOffset(Dst, DstOff, dl, DAG), @@ -3800,6 +3886,24 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, &OutChains[0], OutChains.size()); } +/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// operations. +/// +/// \param DAG Selection DAG where lowered code is placed. +/// \param dl Link to corresponding IR location. +/// \param Chain Control flow dependency. +/// \param Dst Pointer to destination memory location. +/// \param Src Value of byte to write into the memory. +/// \param Size Number of bytes to write. +/// \param Align Alignment of the destination in bytes. +/// \param isVol True if destination is volatile. +/// \param DstPtrInfo IR information on the memory pointer. +/// \returns New head in the control flow, if lowering was successful, empty +/// SDValue otherwise. +/// +/// The function tries to replace 'llvm.memset' intrinsic with several store +/// operations and value calculation code. This is usually profitable for small +/// memory size. static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, @@ -4078,6 +4182,37 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, SDValue* Ops, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + // Allocate the operands array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the allocator is released. + // If the number of operands is less than 5 we use AtomicSDNode's internal + // storage. + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0; + + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, MemVT, + Ops, DynOps, NumOps, MMO, + Ordering, SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, @@ -4117,22 +4252,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT = Cmp.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 4); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain, - Ptr, Cmp, Swp, MMO, Ordering, - SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4190,22 +4311,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain, - Ptr, Val, MMO, - Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4248,21 +4355,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain, - Ptr, MMO, Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. @@ -4339,12 +4433,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, return SDValue(E, 0); } - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -4458,7 +4554,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ExtType, + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -4478,6 +4575,14 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, TBAAInfo, Ranges); } +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, + SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + VT, MMO); +} + SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, @@ -4490,6 +4595,14 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, } +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, + SDValue Chain, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + MemVT, MMO); +} + SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { @@ -4548,8 +4661,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, ISD::UNINDEXED, - false, VT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -4616,8 +4730,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, ISD::UNINDEXED, - true, SVT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -4640,7 +4755,8 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); @@ -4715,10 +4831,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); } AllNodes.push_back(N); @@ -4781,26 +4899,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, return SDValue(E, 0); if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } CSEMap.InsertNode(N, IP); } else { if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } } AllNodes.push_back(N); @@ -4851,76 +4979,81 @@ SDVTList SelectionDAG::getVTList(EVT VT) { } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(2); - Array[0] = VT1; - Array[1] = VT2; - SDVTList Result = makeVTList(Array, 2); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(2U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(2); + Array[0] = VT1; + Array[1] = VT2; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(3); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - SDVTList Result = makeVTList(Array, 3); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(3U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3 && I->VTs[3] == VT4) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(4); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - Array[3] = VT4; - SDVTList Result = makeVTList(Array, 4); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(4U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + ID.AddInteger(VT4.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { - switch (NumVTs) { - case 0: llvm_unreachable("Cannot have nodes without results!"); - case 1: return getVTList(VTs[0]); - case 2: return getVTList(VTs[0], VTs[1]); - case 3: return getVTList(VTs[0], VTs[1], VTs[2]); - case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]); - default: break; + FoldingSetNodeID ID; + ID.AddInteger(NumVTs); + for (unsigned index = 0; index < NumVTs; index++) { + ID.AddInteger(VTs[index].getRawBits()); } - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) { - if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) - continue; - - if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) - return *I; + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(NumVTs); + std::copy(VTs, VTs + NumVTs, Array); + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); + VTListMap.InsertNode(Result, IP); } - - EVT *Array = Allocator.Allocate<EVT>(NumVTs); - std::copy(VTs, VTs+NumVTs, Array); - SDVTList Result = makeVTList(Array, NumVTs); - VTList.push_back(Result); - return Result; + return Result->getSDVTList(); } @@ -5410,7 +5543,8 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, } // Allocate a new MachineSDNode. - N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs); // Initialize the operands list. if (NumOps > array_lengthof(N->LocalOperands)) @@ -5916,6 +6050,12 @@ GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, TheGlobal = GA; } +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, + SDValue X, unsigned SrcAS, + unsigned DestAS) + : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { @@ -6162,8 +6302,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { case ISD::ROTL: case ISD::ROTR: Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], - getShiftAmountOperand(Operands[0].getValueType(), - Operands[1]))); + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: { @@ -6235,7 +6375,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t GVOffset = 0; const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = TLI->getPointerTy().getSizeInBits(); + unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, TLI->getDataLayout()); @@ -6268,6 +6408,38 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split (or expanded) into two not necessarily identical pieces. +std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { + // Currently all types are split in half. + EVT LoVT, HiVT; + if (!VT.isVector()) { + LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); + } else { + unsigned NumElements = VT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NumElements/2); + } + return std::make_pair(LoVT, HiVT); +} + +/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the +/// low/high part. +std::pair<SDValue, SDValue> +SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, + const EVT &HiVT) { + assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= + N.getValueType().getVectorNumElements() && + "More vector elements requested than available!"); + SDValue Lo, Hi; + Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, + getConstant(0, TLI->getVectorIdxTy())); + Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, + getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy())); + return std::make_pair(Lo, Hi); +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); @@ -6389,7 +6561,7 @@ static void checkForCyclesHelper(const SDNode *N, void llvm::checkForCycles(const llvm::SDNode *N) { #ifdef XDEBUG - assert(N && "Checking nonexistant SDNode"); + assert(N && "Checking nonexistent SDNode"); SmallPtrSet<const SDNode*, 32> visited; SmallPtrSet<const SDNode*, 32> checked; checkForCyclesHelper(N, visited, checked); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b9f4381..2b2713d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -49,7 +50,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IntegersSubsetMapping.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" @@ -58,6 +58,7 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> using namespace llvm; @@ -1063,8 +1064,10 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); - if (isa<ConstantPointerNull>(C)) - return DAG.getConstant(0, TLI->getPointerTy()); + if (isa<ConstantPointerNull>(C)) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return DAG.getConstant(0, TLI->getPointerTy(AS)); + } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return DAG.getConstantFP(*CFP, VT); @@ -1268,7 +1271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true, 0, 0)); + VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -1617,8 +1620,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { - assert(CB.CC == ISD::SETCC_INVALID && - "Condition is undefined for to-the-range belonging check."); + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); @@ -1626,9 +1628,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); - if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) { + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), - ISD::SETULE); + ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, VT)); @@ -1741,6 +1743,77 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, DAG.setRoot(BrCond); } +/// Codegen a new tail for a stack protector check ParentMBB which has had its +/// tail spliced into a stack protector check success bb. +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB) { + + // First create the loads to the guard/stack slot for the comparison. + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PtrTy = TLI->getPointerTy(); + + MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI->getStackProtectorIndex(); + + const Value *IRGuard = SPD.getGuard(); + SDValue GuardPtr = getValue(IRGuard); + SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + + unsigned Align = + TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); + + SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + StackSlotPtr, + MachinePointerInfo::getFixedStack(FI), + true, false, false, Align); + + // Perform the comparison via a subtract/getsetcc. + EVT VT = Guard.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, VT), + ISD::SETNE); + + // If the sub is not 0, then we know the guard/stackslot do not equal, so + // branch to failure MBB. + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + MVT::Other, StackSlot.getOperand(0), + Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); + // Otherwise branch to success MBB. + SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, BrCond, + DAG.getBasicBlock(SPD.getSuccessMBB())); + + DAG.setRoot(Br); +} + +/// Codegen the failure basic block for a stack protector check. +/// +/// A failure stack protector machine basic block consists simply of a call to +/// __stack_chk_fail(). +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void +SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { + const TargetLowering *TLI = TM.getTargetLowering(); + SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, + MVT::isVoid, 0, 0, false, getCurSDLoc(), + false, false).second; + DAG.setRoot(Chain); +} + /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, @@ -2073,7 +2146,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { - CC = ISD::SETCC_INVALID; + CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; } @@ -2107,7 +2180,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) { static APInt ComputeRange(const APInt &First, const APInt &Last) { uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); + APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2174,7 +2247,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); const APInt &High = cast<ConstantInt>(I->High)->getValue(); - if (Low.ule(TEI) && TEI.ule(High)) { + if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); if (TEI==High) ++I; @@ -2348,7 +2421,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2378,7 +2451,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI->isOperationLegal(ISD::SHL, TLI->getPointerTy())) + if (!TLI->isOperationLegal(ISD::SHL, PTy)) return false; size_t numCmps = 0; @@ -2421,7 +2494,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (maxValue.ult(IntPtrBits)) { + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -2496,12 +2569,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { - - /// Use a shorter form of declaration, and also - /// show the we want to use CRSBuilder as Clusterifier. - typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier; - - Clusterifier TheClusterifier; + size_t numCmps = 0; BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases @@ -2510,27 +2578,40 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB, - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); - } - - TheClusterifier.optimize(); - - size_t numCmps = 0; - for (Clusterifier::RangeIterator i = TheClusterifier.begin(), - e = TheClusterifier.end(); i != e; ++i, ++numCmps) { - Clusterifier::Cluster &C = *i; - // Update edge weight for the cluster. - unsigned W = C.first.Weight; - - // FIXME: Currently work with ConstantInt based numbers. - // Changing it to APInt based is a pretty heavy for this commit. - Cases.push_back(Case(C.first.getLow().toConstantInt(), - C.first.getHigh().toConstantInt(), C.second, W)); + uint32_t ExtraWeight = + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; + + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), + SMBB, ExtraWeight)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + J != Cases.end(); ) { + const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); + const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + I->ExtraWeight += J->ExtraWeight; + J = Cases.erase(J); + } else { + I = J++; + } + } - if (C.first.getLow() != C.first.getHigh()) - // A range counts double, since it requires two compares. - ++numCmps; + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; } return numCmps; @@ -2859,6 +2940,21 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { setValue(&I, N); // noop cast. } +void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Value *SV = I.getOperand(0); + SDValue N = getValue(SV); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + + unsigned SrcAS = SV->getType()->getPointerAddressSpace(); + unsigned DestAS = I.getType()->getPointerAddressSpace(); + + if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); + + setValue(&I, N); +} + void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); @@ -3151,10 +3247,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { - SDValue N = getValue(I.getOperand(0)); + Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. - Type *Ty = I.getOperand(0)->getType()->getScalarType(); + Type *Ty = Op0->getType()->getScalarType(); + unsigned AS = Ty->getPointerAddressSpace(); + SDValue N = getValue(Op0); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3179,14 +3277,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI->getPointerTy(); + EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), - TLI->getPointerTy(), + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, DAG.getConstant(Offs, MVT::i64)); else - OffsVal = DAG.getIntPtrConstant(Offs); + OffsVal = DAG.getConstant(Offs, PTy); N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); @@ -3194,7 +3291,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(), + APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), TD->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); @@ -3451,7 +3548,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getValueType().getSimpleVT(), + getValue(I.getCompareOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), @@ -3499,7 +3596,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue L = DAG.getAtomic(NT, dl, - getValue(I.getValOperand()).getValueType().getSimpleVT(), + getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), @@ -4193,7 +4290,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; - if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && + if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { APFloat Ten(10.0f); @@ -4705,14 +4802,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, TLI->getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, sdl, - TLI->getPointerTy(), + CfaArg.getValueType(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, - TLI->getPointerTy()), + CfaArg.getValueType()), CfaArg); SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), DAG.getConstant(0, TLI->getPointerTy())); - setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI->getPointerTy(), + setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return 0; } @@ -4902,7 +4999,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: - case Intrinsic::nearbyint: { + case Intrinsic::nearbyint: + case Intrinsic::round: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4915,6 +5013,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } setValue(&I, DAG.getNode(Opcode, sdl, @@ -4922,6 +5021,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::copysign: + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -5207,9 +5312,30 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::invariant_end: // Discard region information. return 0; + case Intrinsic::stackprotectorcheck: { + // Do not actually emit anything for this basic block. Instead we initialize + // the stack protector descriptor and export the guard variable so we can + // access it in FinishBasicBlock. + const BasicBlock *BB = I.getParent(); + SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); + ExportFromCurrentBlock(SPDescriptor.getGuard()); + + // Flush our exports since we are going to process a terminator. + (void)getControlRoot(); + return 0; + } case Intrinsic::donothing: // ignore return 0; + case Intrinsic::experimental_stackmap: { + visitStackmap(I); + return 0; + } + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: { + visitPatchpoint(I); + return 0; + } } } @@ -5274,15 +5400,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); - Entry.Alignment = CS.getParamAlignment(attrInd); + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); Args.push_back(Entry); } @@ -5364,8 +5483,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, } if (!Result.second.getNode()) { - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. + // As a special case, a null chain means that a tail call has been emitted + // and the DAG root is already updated. HasTailCall = true; // Since there's no actual continuation from this block, nothing can be @@ -5445,6 +5564,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } +/// processIntegerCallValue - Record the value for an instruction that +/// produces an integer result, converting the type where necessary. +void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, + SDValue Value, + bool IsSigned) { + EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + if (IsSigned) + Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); + else + Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); + setValue(&I, Value); +} /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be @@ -5460,15 +5591,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); + const Value *Size = I.getArgOperand(2); + const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); + if (CSize && CSize->getZExtValue() == 0) { + EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + setValue(&I, DAG.getConstant(0, CallVT)); + return true; + } + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(LHS), getValue(RHS), getValue(Size), + MachinePointerInfo(LHS), + MachinePointerInfo(RHS)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; Type *LoadTy; - switch (Size->getZExtValue()) { + switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; LoadTy = 0; @@ -5476,20 +5625,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { break; case 2: LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(Size->getContext()); + LoadTy = Type::getInt16Ty(CSize->getContext()); break; case 4: LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); break; case 8: LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(Size->getContext()); + LoadTy = Type::getInt64Ty(CSize->getContext()); break; /* case 16: LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); LoadTy = VectorType::get(LoadTy, 4); break; */ @@ -5503,7 +5652,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. const TargetLowering *TLI = TM.getTargetLowering(); - if (ActuallyDoIt && Size->getZExtValue() > 4) { + if (ActuallyDoIt && CSize->getZExtValue() > 4) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) @@ -5516,8 +5665,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); - EVT CallVT = TLI->getValueType(I.getType(), true); - setValue(&I, DAG.getZExtOrTrunc(Res, getCurSDLoc(), CallVT)); + processIntegerCallValue(I, Res, false); return true; } } @@ -5526,6 +5674,148 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitMemChrCall -- See if we can lower a memchr call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { + // Verify that the prototype makes sense. void *memchr(void *, int, size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *Src = I.getArgOperand(0); + const Value *Char = I.getArgOperand(1); + const Value *Length = I.getArgOperand(2); + if (!Src->getType()->isPointerTy() || + !Char->getType()->isIntegerTy() || + !Length->getType()->isIntegerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Src), getValue(Char), getValue(Length), + MachinePointerInfo(Src)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an +/// optimized form. If so, return true and lower it, otherwise return false +/// and it will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { + // Verify that the prototype makes sense. char *strcpy(char *, char *) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1), isStpcpy); + if (Res.first.getNode()) { + setValue(&I, Res.first); + DAG.setRoot(Res.second); + return true; + } + + return false; +} + +/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int strcmp(void*,void*) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrLenCall -- See if we can lower a strlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strlen(char *) + if (I.getNumArgOperands() != 1) + return false; + + const Value *Arg0 = I.getArgOperand(0); + if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strnlen(char *, size_t) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + /// visitUnaryFloatCall - If a call instruction is a unary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. @@ -5644,6 +5934,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + if (visitUnaryFloatCall(I, ISD::FROUND)) + return; + break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: @@ -5666,6 +5962,30 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitMemCmpCall(I)) return; break; + case LibFunc::memchr: + if (visitMemChrCall(I)) + return; + break; + case LibFunc::strcpy: + if (visitStrCpyCall(I, false)) + return; + break; + case LibFunc::stpcpy: + if (visitStrCpyCall(I, true)) + return; + break; + case LibFunc::strcmp: + if (visitStrCmpCall(I)) + return; + break; + case LibFunc::strlen: + if (visitStrLenCall(I)) + return; + break; + case LibFunc::strnlen: + if (visitStrNLenCall(I)) + return; + break; } } } @@ -6421,6 +6741,248 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.getSrcValue(I.getArgOperand(1)))); } +/// \brief Lower an argument list according to the target calling convention. +/// +/// \return A tuple of <return-value, token-chain> +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +std::pair<SDValue, SDValue> +SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, + unsigned NumArgs, SDValue Callee, + bool useVoidTy) { + TargetLowering::ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(&CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + const Value *V = CI.getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(V); + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, + /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, + CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, + /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); + + const TargetLowering *TLI = TM.getTargetLowering(); + return TLI->LowerCallTo(CLI); +} + +/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { + // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, + // [live variables...]) + + assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); + + SDValue Callee = getValue(CI.getCalledValue()); + + // Lower into a call sequence with no args and no return value. + std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + SDNode *CallEnd = Chain.getNode(); + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the stackmap intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numShadowBytes> constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); + } + // Push live variables for the stack map. + for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a STACKMAP node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), + NodeTys, Ops); + + // StackMap generates no value, so nothing goes in the NodeMap. + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. + DAG.ReplaceAllUsesWith(Call, MN); + + DAG.DeleteNode(Call); +} + +/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + + CallingConv::ID CC = CI.getCallingConv(); + bool isAnyRegCC = CC == CallingConv::AnyReg; + bool hasDef = !CI.getType()->isVoidTy(); + SDValue Callee = getValue(CI.getOperand(2)); // <target> + + // Get the real number of arguments participating in the call <numArgs> + unsigned NumArgs = + cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + assert(CI.getNumArgOperands() >= NumArgs + 4 && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; + std::pair<SDValue, SDValue> Result = + LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + SDNode *CallEnd = Chain.getNode(); + if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the patchable intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numNopBytes> constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); + } + // Assume that the Callee is a constant address. + Ops.push_back( + DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), + /*isTarget=*/true)); + + // Adjust <numArgs> to account for any arguments that have been passed on the + // stack instead. + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); + NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (isAnyRegCC) + for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the arguments from the call instruction. + SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; + for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) + Ops.push_back(*i); + + // Push live variables for the stack map. + for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else + Ops.push_back(OpVal); + } + + // Push the register mask info. + if (hasGlue) + Ops.push_back(*(Call->op_end()-2)); + else + Ops.push_back(*(Call->op_end()-1)); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys; + if (isAnyRegCC && hasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 3> ValueVTs; + ComputeValueVTs(TLI, CI.getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); + + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, + getCurSDLoc(), NodeTys, Ops); + + // Update the NodeMap. + if (hasDef) { + if (isAnyRegCC) + setValue(&CI, SDValue(MN, 0)); + else + setValue(&CI, Result.first); + } + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (isAnyRegCC && hasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); + DAG.DeleteNode(Call); +} + /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are @@ -6438,6 +7000,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); @@ -6527,7 +7090,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, i < CLI.NumFixedArgs, i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) @@ -6666,7 +7229,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ISD::ArgFlagsTy Flags; Flags.setSRet(); MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); Ins.push_back(RetArg); } @@ -6677,6 +7240,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); + unsigned PartBase = 0; for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; @@ -6714,8 +7278,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, - Idx-1, i*RegisterVT.getStoreSize()); + ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, + Idx-1, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -6723,6 +7287,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setOrigAlign(1); Ins.push_back(MyFlags); } + PartBase += VT.getStoreSize(); } } @@ -6940,3 +7505,22 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { ConstantsOut.clear(); } + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +SelectionDAGBuilder::StackProtectorDescriptor:: +AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB) { + // If SuccBB has not been created yet, create it. + if (!SuccMBB) { + MachineFunction *MF = ParentMBB->getParent(); + MachineFunction::iterator BBI = ParentMBB; + SuccMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++BBI, SuccMBB); + } + // Add it as a successor of ParentMBB. + ParentMBB->addSuccessor(SuccMBB); + return SuccMBB; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ef73c00..835f643 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- c++ -*---===// +//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -26,6 +26,7 @@ namespace llvm { +class AddrSpaceCastInst; class AliasAnalysis; class AllocaInst; class BasicBlock; @@ -84,7 +85,7 @@ class SelectionDAGBuilder { const Instruction *CurInst; DenseMap<const Value*, SDValue> NodeMap; - + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used /// to preserve debug information for incoming arguments. DenseMap<const Value*, SDValue> UnusedArgNodeMap; @@ -182,6 +183,17 @@ private: typedef std::vector<CaseRec> CaseRecVector; + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator()(const Case &C1, const Case &C2) { + assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; @@ -224,7 +236,7 @@ private: struct JumpTable { JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} - + /// Reg - the virtual register containing the index of the jump table entry //. to jump to. unsigned Reg; @@ -278,6 +290,201 @@ private: BitTestInfo Cases; }; + /// A class which encapsulates all of the information needed to generate a + /// stack protector check and signals to isel via its state being initialized + /// that a stack protector needs to be generated. + /// + /// *NOTE* The following is a high level documentation of SelectionDAG Stack + /// Protector Generation. The reason that it is placed here is for a lack of + /// other good places to stick it. + /// + /// High Level Overview of SelectionDAG Stack Protector Generation: + /// + /// Previously, generation of stack protectors was done exclusively in the + /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated + /// splitting basic blocks at the IR level to create the success/failure basic + /// blocks in the tail of the basic block in question. As a result of this, + /// calls that would have qualified for the sibling call optimization were no + /// longer eligible for optimization since said calls were no longer right in + /// the "tail position" (i.e. the immediate predecessor of a ReturnInst + /// instruction). + /// + /// Then it was noticed that since the sibling call optimization causes the + /// callee to reuse the caller's stack, if we could delay the generation of + /// the stack protector check until later in CodeGen after the sibling call + /// decision was made, we get both the tail call optimization and the stack + /// protector check! + /// + /// A few goals in solving this problem were: + /// + /// 1. Preserve the architecture independence of stack protector generation. + /// + /// 2. Preserve the normal IR level stack protector check for platforms like + /// OpenBSD for which we support platform specific stack protector + /// generation. + /// + /// The main problem that guided the present solution is that one can not + /// solve this problem in an architecture independent manner at the IR level + /// only. This is because: + /// + /// 1. The decision on whether or not to perform a sibling call on certain + /// platforms (for instance i386) requires lower level information + /// related to available registers that can not be known at the IR level. + /// + /// 2. Even if the previous point were not true, the decision on whether to + /// perform a tail call is done in LowerCallTo in SelectionDAG which + /// occurs after the Stack Protector Pass. As a result, one would need to + /// put the relevant callinst into the stack protector check success + /// basic block (where the return inst is placed) and then move it back + /// later at SelectionDAG/MI time before the stack protector check if the + /// tail call optimization failed. The MI level option was nixed + /// immediately since it would require platform specific pattern + /// matching. The SelectionDAG level option was nixed because + /// SelectionDAG only processes one IR level basic block at a time + /// implying one could not create a DAG Combine to move the callinst. + /// + /// To get around this problem a few things were realized: + /// + /// 1. While one can not handle multiple IR level basic blocks at the + /// SelectionDAG Level, one can generate multiple machine basic blocks + /// for one IR level basic block. This is how we handle bit tests and + /// switches. + /// + /// 2. At the MI level, tail calls are represented via a special return + /// MIInst called "tcreturn". Thus if we know the basic block in which we + /// wish to insert the stack protector check, we get the correct behavior + /// by always inserting the stack protector check right before the return + /// statement. This is a "magical transformation" since no matter where + /// the stack protector check intrinsic is, we always insert the stack + /// protector check code at the end of the BB. + /// + /// Given the aforementioned constraints, the following solution was devised: + /// + /// 1. On platforms that do not support SelectionDAG stack protector check + /// generation, allow for the normal IR level stack protector check + /// generation to continue. + /// + /// 2. On platforms that do support SelectionDAG stack protector check + /// generation: + /// + /// a. Use the IR level stack protector pass to decide if a stack + /// protector is required/which BB we insert the stack protector check + /// in by reusing the logic already therein. If we wish to generate a + /// stack protector check in a basic block, we place a special IR + /// intrinsic called llvm.stackprotectorcheck right before the BB's + /// returninst or if there is a callinst that could potentially be + /// sibling call optimized, before the call inst. + /// + /// b. Then when a BB with said intrinsic is processed, we codegen the BB + /// normally via SelectBasicBlock. In said process, when we visit the + /// stack protector check, we do not actually emit anything into the + /// BB. Instead, we just initialize the stack protector descriptor + /// class (which involves stashing information/creating the success + /// mbbb and the failure mbb if we have not created one for this + /// function yet) and export the guard variable that we are going to + /// compare. + /// + /// c. After we finish selecting the basic block, in FinishBasicBlock if + /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is + /// initialized, we first find a splice point in the parent basic block + /// before the terminator and then splice the terminator of said basic + /// block into the success basic block. Then we code-gen a new tail for + /// the parent basic block consisting of the two loads, the comparison, + /// and finally two branches to the success/failure basic blocks. We + /// conclude by code-gening the failure basic block if we have not + /// code-gened it already (all stack protector checks we generate in + /// the same function, use the same failure basic block). + class StackProtectorDescriptor { + public: + StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0), + Guard(0) { } + ~StackProtectorDescriptor() { } + + /// Returns true if all fields of the stack protector descriptor are + /// initialized implying that we should/are ready to emit a stack protector. + bool shouldEmitStackProtector() const { + return ParentMBB && SuccessMBB && FailureMBB && Guard; + } + + /// Initialize the stack protector descriptor structure for a new basic + /// block. + void initialize(const BasicBlock *BB, + MachineBasicBlock *MBB, + const CallInst &StackProtCheckCall) { + // Make sure we are not initialized yet. + assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " + "already initialized!"); + ParentMBB = MBB; + SuccessMBB = AddSuccessorMBB(BB, MBB); + FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB); + if (!Guard) + Guard = StackProtCheckCall.getArgOperand(0); + } + + /// Reset state that changes when we handle different basic blocks. + /// + /// This currently includes: + /// + /// 1. The specific basic block we are generating a + /// stack protector for (ParentMBB). + /// + /// 2. The successor machine basic block that will contain the tail of + /// parent mbb after we create the stack protector check (SuccessMBB). This + /// BB is visited only on stack protector check success. + void resetPerBBState() { + ParentMBB = 0; + SuccessMBB = 0; + } + + /// Reset state that only changes when we switch functions. + /// + /// This currently includes: + /// + /// 1. FailureMBB since we reuse the failure code path for all stack + /// protector checks created in an individual function. + /// + /// 2.The guard variable since the guard variable we are checking against is + /// always the same. + void resetPerFunctionState() { + FailureMBB = 0; + Guard = 0; + } + + MachineBasicBlock *getParentMBB() { return ParentMBB; } + MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } + MachineBasicBlock *getFailureMBB() { return FailureMBB; } + const Value *getGuard() { return Guard; } + + private: + /// The basic block for which we are generating the stack protector. + /// + /// As a result of stack protector generation, we will splice the + /// terminators of this basic block into the successor mbb SuccessMBB and + /// replace it with a compare/branch to the successor mbbs + /// SuccessMBB/FailureMBB depending on whether or not the stack protector + /// was violated. + MachineBasicBlock *ParentMBB; + + /// A basic block visited on stack protector check success that contains the + /// terminators of ParentMBB. + MachineBasicBlock *SuccessMBB; + + /// This basic block visited on stack protector check failure that will + /// contain a call to __stack_chk_fail(). + MachineBasicBlock *FailureMBB; + + /// The guard variable which we will compare against the stored value in the + /// stack protector stack slot. + const Value *Guard; + + /// Add a successor machine basic block to ParentMBB. If the successor mbb + /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic + /// block will be created. + MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB = 0); + }; + private: const TargetMachine &TM; public: @@ -295,6 +502,9 @@ public: /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; + /// A StackProtectorDescriptor structure used to communicate stack protector + /// information in between SelectBasicBlock and FinishBasicBlock. + StackProtectorDescriptor SPDescriptor; // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. @@ -305,9 +515,9 @@ public: FunctionLoweringInfo &FuncInfo; /// OptLevel - What optimization level we're generating code for. - /// + /// CodeGenOpt::Level OptLevel; - + /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; @@ -389,7 +599,7 @@ public: assert(N.getNode() == 0 && "Already set a value for this node!"); N = NewN; } - + void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(N.getNode() == 0 && "Already set a value for this node!"); @@ -410,6 +620,12 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); + std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + bool useVoidTy = false); + /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that ned to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); @@ -451,6 +667,9 @@ private: public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); + void visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB); + void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, @@ -461,7 +680,7 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); - + private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); @@ -502,6 +721,7 @@ private: void visitPtrToInt(const User &I); void visitIntToPtr(const User &I); void visitBitCast(const User &I); + void visitAddrSpaceCast(const User &I); void visitExtractElement(const User &I); void visitInsertElement(const User &I); @@ -523,6 +743,11 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitMemChrCall(const CallInst &I); + bool visitStrCpyCall(const CallInst &I, bool isStpcpy); + bool visitStrCmpCall(const CallInst &I); + bool visitStrLenCall(const CallInst &I); + bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); @@ -535,6 +760,8 @@ private: void visitVAArg(const VAArgInst &I); void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); + void visitStackmap(const CallInst &I); + void visitPatchpoint(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -543,10 +770,13 @@ private: llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } + void processIntegerCallValue(const Instruction &I, + SDValue Value, bool IsSigned); + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); /// EmitFuncArgumentDbgValue - If V is an function argument then create - /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, int64_t Offset, const SDValue &N); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index d8ee221..c04a08d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -142,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; case ISD::FEXP2: return "fexp2"; case ISD::FLOG: return "flog"; @@ -223,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; + case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -484,6 +486,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const AddrSpaceCastSDNode *ASC = + dyn_cast<AddrSpaceCastSDNode>(this)) { + OS << '[' + << ASC->getSrcAddressSpace() + << " -> " + << ASC->getDestAddressSpace() + << ']'; } if (unsigned Order = getIROrder()) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 01da51c..3a0cfa1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -223,6 +223,44 @@ defaultListDAGScheduler("default", "Best scheduler for the target", namespace llvm { //===--------------------------------------------------------------------===// + /// \brief This class is used by SelectionDAGISel to temporarily override + /// the optimization level on a per-function basis. + class OptLevelChanger { + SelectionDAGISel &IS; + CodeGenOpt::Level SavedOptLevel; + bool SavedFastISel; + + public: + OptLevelChanger(SelectionDAGISel &ISel, + CodeGenOpt::Level NewOptLevel) : IS(ISel) { + SavedOptLevel = IS.OptLevel; + if (NewOptLevel == SavedOptLevel) + return; + IS.OptLevel = NewOptLevel; + IS.TM.setOptLevel(NewOptLevel); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) + IS.TM.setFastISel(true); + DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel + << " ; After: -O" << NewOptLevel << "\n"); + } + + ~OptLevelChanger() { + if (IS.OptLevel == SavedOptLevel) + return; + DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel + << " ; After: -O" << SavedOptLevel << "\n"); + IS.OptLevel = SavedOptLevel; + IS.TM.setOptLevel(SavedOptLevel); + IS.TM.setFastISel(SavedFastISel); + } + }; + + //===--------------------------------------------------------------------===// /// createDefaultScheduler - This creates an instruction scheduler appropriate /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, @@ -230,7 +268,7 @@ namespace llvm { const TargetLowering *TLI = IS->getTargetLowering(); const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>(); - if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || + if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::RegPressure) @@ -356,6 +394,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const Function &Fn = *mf.getFunction(); const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetLowering *TLI = TM.getTargetLowering(); MF = &mf; RegInfo = &MF->getRegInfo(); @@ -369,11 +408,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { ST.resetSubtargetFeatures(MF); TM.resetTargetOptions(MF); + // Reset OptLevel to None for optnone functions. + CodeGenOpt::Level NewOptLevel = OptLevel; + if (Fn.hasFnAttribute(Attribute::OptimizeNone)) + NewOptLevel = CodeGenOpt::None; + OptLevelChanger OLC(*this, NewOptLevel); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TTI); + CurDAG->init(*MF, TTI, TLI); FuncInfo->set(Fn, *MF); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -408,9 +453,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); - MachineBasicBlock::iterator InsertPos = Def; - // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + if (Def) { + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } else + DEBUG(dbgs() << "Dropping debug info for dead vreg" + << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. @@ -422,7 +471,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getOperand(MI->getNumOperands()-1).getMetadata(); - bool IsIndirect = MI->getOperand(1).isImm(); + bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), @@ -497,6 +546,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (J == E) break; To = J->second; } + // Make sure the new register has a sufficiently constrained register class. + if (TargetRegisterInfo::isVirtualRegister(From) && + TargetRegisterInfo::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. MRI.replaceRegWith(From, To); } @@ -617,6 +670,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + CurDAG->NewNodesMustHaveLegalTypes = true; + if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); @@ -1140,6 +1195,91 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { delete FastIS; SDB->clearDanglingDebugInfo(); + SDB->SPDescriptor.resetPerFunctionState(); +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr *MI) { + // If we do not have a copy or an implicit def, we return true if and only if + // MI is a debug value. + if (!MI->isCopy() && !MI->isImplicitDef()) + // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the + // physical registers if there is debug info associated with the terminator + // of our mbb. We want to include said debug info in our terminator + // sequence, so we return true in that case. + return MI->isDebugValue(); + + // We have left the terminator sequence if we are not doing one of the + // following: + // + // 1. Copying a vreg into a physical register. + // 2. Copying a vreg into a vreg. + // 3. Defining a register via an implicit def. + + // OPI should always be a register definition... + MachineInstr::const_mop_iterator OPI = MI->operands_begin(); + if (!OPI->isReg() || !OPI->isDef()) + return false; + + // Defining any register via an implicit def is always ok. + if (MI->isImplicitDef()) + return true; + + // Grab the copy source... + MachineInstr::const_mop_iterator OPI2 = OPI; + ++OPI2; + assert(OPI2 != MI->operands_end() + && "Should have a copy implying we should have 2 arguments."); + + // Make sure that the copy dest is not a vreg when the copy source is a + // physical register. + if (!OPI2->isReg() || + (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && + TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + return false; + + return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +static MachineBasicBlock::iterator +FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { + MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); + // + if (SplitPoint == BB->begin()) + return SplitPoint; + + MachineBasicBlock::iterator Start = BB->begin(); + MachineBasicBlock::iterator Previous = SplitPoint; + --Previous; + + while (MIIsInTerminatorSequence(Previous)) { + SplitPoint = Previous; + if (Previous == Start) + break; + --Previous; + } + + return SplitPoint; } void @@ -1152,11 +1292,13 @@ SelectionDAGISel::FinishBasicBlock() { << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + const bool MustUpdatePHINodes = SDB->SwitchCases.empty() && + SDB->JTCases.empty() && + SDB->BitTestCases.empty(); + // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. - if (SDB->SwitchCases.empty() && - SDB->JTCases.empty() && - SDB->BitTestCases.empty()) { + if (MustUpdatePHINodes) { for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && @@ -1165,9 +1307,54 @@ SelectionDAGISel::FinishBasicBlock() { continue; PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } - return; } + // Handle stack protector. + if (SDB->SPDescriptor.shouldEmitStackProtector()) { + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); + + // Find the split point to split the parent mbb. At the same time copy all + // physical registers used in the tail of parent mbb into virtual registers + // before the split point and back into physical registers after the split + // point. This prevents us needing to deal with Live-ins and many other + // register allocation issues caused by us splitting the parent mbb. The + // register allocator will clean up said virtual copies later on. + MachineBasicBlock::iterator SplitPoint = + FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc()); + + // Splice the terminator of ParentMBB into SuccessMBB. + SuccessMBB->splice(SuccessMBB->end(), ParentMBB, + SplitPoint, + ParentMBB->end()); + + // Add compare/jump on neq/jump to the parent BB. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = ParentMBB->end(); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // CodeGen Failure MBB if we have not codegened it yet. + MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); + if (!FailureMBB->size()) { + FuncInfo->MBB = FailureMBB; + FuncInfo->InsertPt = FailureMBB->end(); + SDB->visitSPDescriptorFailure(SDB->SPDescriptor); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } + + // If we updated PHI Nodes, return early. + if (MustUpdatePHINodes) + return; + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { @@ -1741,15 +1928,15 @@ WalkChainUsers(const SDNode *ChainedNode, SDNode *User = *UI; + if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. + continue; + // If we see an already-selected machine node, then we've gone beyond the // pattern that we're selecting down into the already selected chunk of the // DAG. - if (User->isMachineOpcode() || - User->getOpcode() == ISD::HANDLENODE) // Root of the graph. - continue; - unsigned UserOpcode = User->getOpcode(); - if (UserOpcode == ISD::CopyToReg || + if (User->isMachineOpcode() || + UserOpcode == ISD::CopyToReg || UserOpcode == ISD::CopyFromReg || UserOpcode == ISD::INLINEASM || UserOpcode == ISD::EH_LABEL || @@ -1886,7 +2073,6 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, } } - SDValue Res; if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), @@ -1962,6 +2148,18 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N == RecordedNodes[RecNo].first; } +/// CheckChildSame - Implements OP_CheckChildXSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), + RecordedNodes); +} + /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, @@ -2076,6 +2274,13 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckSame: Result = !::CheckSame(Table, Index, N, RecordedNodes); return Index; + case SelectionDAGISel::OPC_CheckChild0Same: + case SelectionDAGISel::OPC_CheckChild1Same: + case SelectionDAGISel::OPC_CheckChild2Same: + case SelectionDAGISel::OPC_CheckChild3Same: + Result = !::CheckChildSame(Table, Index, N, RecordedNodes, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); + return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: Result = !::CheckPatternPredicate(Table, Index, SDISel); return Index; @@ -2373,6 +2578,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckSame: if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; continue; + + case OPC_CheckChild0Same: case OPC_CheckChild1Same: + case OPC_CheckChild2Same: case OPC_CheckChild3Same: + if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, + Opcode-OPC_CheckChild0Same)) + break; + continue; + case OPC_CheckPatternPredicate: if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; continue; @@ -2432,7 +2645,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_SwitchType: { - MVT CurNodeVT = N.getValueType().getSimpleVT(); + MVT CurNodeVT = N.getSimpleValueType(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { @@ -2544,7 +2757,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { @@ -2569,7 +2782,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2606,7 +2819,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. for (unsigned i = 0; i != NumChains; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2633,7 +2846,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitCopyToReg: { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; if (InputChain.getNode() == 0) @@ -2650,7 +2863,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitNodeXForm: { unsigned XFormNo = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); continue; @@ -2827,7 +3040,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults"); GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); } continue; @@ -2844,7 +3057,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (ResSlot & 128) ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); - assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); + assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); SDValue Res = RecordedNodes[ResSlot].first; assert(i < NodeToMatch->getNumValues() && diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e3c6306..82b068d 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -64,13 +64,29 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return isUsedByReturnOnly(Node, Chain); } +/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} /// Generate a libcall taking the given operands as arguments and returning a /// result of type RetVT. -SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, - RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, SDLoc dl) const { +std::pair<SDValue, SDValue> +TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, SDLoc dl, + bool doesNotReturn, + bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -89,11 +105,9 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI); - - return CallInfo.first; + doesNotReturn, isReturnValueUsed, Callee, Args, + DAG, dl); + return LowerCallTo(CLI); } @@ -183,14 +197,16 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = { NewLHS, NewRHS }; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; NewRHS = DAG.getConstant(0, RetVT); CCCode = getCmpLibcallCC(LC1); if (LC2 != RTLIB::UNKNOWN_LIBCALL) { SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); @@ -632,6 +648,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), NarrowShl)); } + // Repeat the SHL optimization above in cases where an extension + // intervenes: (shl (anyext (shr x, c1)), c2) to + // (shl (anyext x), c2-c1). This requires that the bottom c1 bits + // aren't demanded (as above) and that the shifted upper c1 bits of + // x aren't demanded. + if (InOp.hasOneUse() && + InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse() && + isa<ConstantSDNode>(InnerOp.getOperand(1))) { + uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) + ->getZExtValue(); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && + NewMask.trunc(ShAmt) == 0) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } + } } KnownZero <<= SA->getZExtValue(); @@ -722,13 +763,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), Op.getOperand(1))); - } else if (KnownOne.intersects(SignBit)) { // New bits are known one. - KnownOne |= HighBits; + + int Log2 = NewMask.exactLogBase2(); + if (Log2 >= 0) { + // The bit must come from the sign. + SDValue NewSA = + TLO.DAG.getConstant(BitWidth - 1 - Log2, + Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), NewSA)); } + + if (KnownOne.intersects(SignBit)) + // New bits are known one. + KnownOne |= HighBits; } break; case ISD::SIGN_EXTEND_INREG: { @@ -1077,13 +1129,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE: case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return DAG.getConstant(1, VT); + case ISD::SETTRUE2: { + TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + return DAG.getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } } // Ensure that the constant occurs on the RHS, and fold constant // comparisons. - if (isa<ConstantSDNode>(N0.getNode())) - return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); + if (isa<ConstantSDNode>(N0.getNode()) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1178,6 +1237,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. if (DCI.isBeforeLegalize() && + !ISD::isSignedIntSetCC(Cond) && N0.getOpcode() == ISD::AND && C1 == 0 && N0.getNode()->hasOneUse() && isa<LoadSDNode>(N0.getOperand(0)) && @@ -1322,7 +1382,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } if ((N0.getOpcode() == ISD::XOR || @@ -1759,16 +1821,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { if (ValueHasExactlyOneBitSet(N1, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } } } if (N1.getOpcode() == ISD::AND) if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { if (ValueHasExactlyOneBitSet(N0, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N0.getValueType()); - return DAG.getSetCC(dl, VT, N1, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N1.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } } } } @@ -1993,7 +2061,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { - if (Constraint[0] != '{') + if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2142,8 +2210,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( break; } } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { - OpInfo.ConstraintVT = MVT::getIntegerVT( - 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); + unsigned PtrSize + = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace()); + OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } |