aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1111
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp36
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp374
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp129
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp81
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h17
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp90
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp268
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp506
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp742
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h246
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp261
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp119
20 files changed, 3178 insertions, 938 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cb88941..43f72c5 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -35,6 +35,8 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -43,6 +45,7 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
+STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
@@ -53,6 +56,14 @@ namespace {
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Include global information in alias analysis"));
+ /// Hidden option to stress test load slicing, i.e., when this option
+ /// is enabled, load slicing bypasses most of its profitability guards.
+ static cl::opt<bool>
+ StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
+ cl::desc("Bypass the profitability model of load "
+ "slicing"),
+ cl::init(false));
+
//------------------------------ DAGCombiner ---------------------------------//
class DAGCombiner {
@@ -62,6 +73,7 @@ namespace {
CodeGenOpt::Level OptLevel;
bool LegalOperations;
bool LegalTypes;
+ bool ForCodeSize;
// Worklist of all of the nodes that need to be simplified.
//
@@ -144,6 +156,7 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
+ bool SliceUpLoad(SDNode *N);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
@@ -283,11 +296,11 @@ namespace {
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
- bool isAlias(SDValue Ptr1, int64_t Size1,
+ bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
const MDNode *TBAAInfo1,
- SDValue Ptr2, int64_t Size2,
+ SDValue Ptr2, int64_t Size2, bool IsVolatile2,
const Value *SrcValue2, int SrcValueOffset2,
unsigned SrcValueAlign2,
const MDNode *TBAAInfo2) const;
@@ -299,7 +312,7 @@ namespace {
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size,
+ SDValue &Ptr, int64_t &Size, bool &IsVolatile,
const Value *&SrcValue, int &SrcValueOffset,
unsigned &SrcValueAlignment,
const MDNode *&TBAAInfo) const;
@@ -315,8 +328,15 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
+ AttributeSet FnAttrs =
+ DAG.getMachineFunction().getFunction()->getAttributes();
+ ForCodeSize =
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ }
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
@@ -329,7 +349,8 @@ namespace {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) : TLI.getPointerTy();
+ return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
+ : TLI.getPointerTy();
}
/// isTypeLegal - This method returns true if we are running before type
@@ -744,9 +765,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = true;
return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(),
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ MemVT, LD->getMemOperand());
}
unsigned Opc = Op.getOpcode();
@@ -967,9 +986,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
: LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(),
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ MemVT, LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
DEBUG(dbgs() << "\nPromoting ";
@@ -1017,7 +1034,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// try and combine it.
while (!WorkListContents.empty()) {
SDNode *N;
- // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+ // The WorkListOrder holds the SDNodes in order, but it may contain
+ // duplicates.
// In order to avoid a linear scan, we use a set (O(log N)) to hold what the
// worklist *should* contain, and check the node we want to visit is should
// actually be visited.
@@ -1617,19 +1635,8 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
bool LegalOperations, bool LegalTypes) {
if (!VT.isVector())
return DAG.getConstant(0, VT);
- if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
- // Produce a vector of zeros.
- EVT ElemTy = VT.getVectorElementType();
- if (LegalTypes && TLI.getTypeAction(*DAG.getContext(), ElemTy) ==
- TargetLowering::TypePromoteInteger)
- ElemTy = TLI.getTypeToTransformTo(*DAG.getContext(), ElemTy);
- assert((!LegalTypes || TLI.isTypeLegal(ElemTy)) &&
- "Type for zero vector elements is not legal");
- SDValue El = DAG.getConstant(0, ElemTy);
- std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
- &Ops[0], Ops.size());
- }
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return DAG.getConstant(0, VT);
return SDValue();
}
@@ -1771,8 +1778,8 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
return SDValue();
}
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
-/// all the same constant or undefined.
+/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
+/// elements are all the same constant or undefined.
static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
if (!C)
@@ -1808,9 +1815,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
- ConstValue0 = N0IsConst? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() : APInt();
+ ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
+ : APInt();
N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
- ConstValue1 = N1IsConst? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() : APInt();
+ ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
+ : APInt();
}
// fold (mul c1, c2) -> c1*c2
@@ -1823,20 +1832,24 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1 == 0)
return N1;
+ // We require a splat of the entire scalar bit width for non-contiguous
+ // bit patterns.
+ bool IsFullSplat =
+ ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1 == 1)
+ if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT), N0);
// fold (mul x, (1 << c)) -> x << c
- if (N1IsConst && ConstValue1.isPowerOf2())
+ if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(ConstValue1.logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && (-ConstValue1).isPowerOf2()) {
+ if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
unsigned Log2Val = (-ConstValue1).logBase2();
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
@@ -2675,6 +2688,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
}
+ // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+ if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+ Op0 == Op1 && LL.getValueType().isInteger() &&
+ Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
+ cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
+ (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ cast<ConstantSDNode>(RR)->isNullValue()))) {
+ SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
+ LL, DAG.getConstant(1, LL.getValueType()));
+ AddToWorkList(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(N), VT, ADDNode,
+ DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
+ }
// canonicalize equivalent to ll == rl
if (LL == RR && LR == RL) {
Op1 = ISD::getSetCCSwappedOperands(Op1);
@@ -2718,9 +2744,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ MemVT, LN0->getMemOperand());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2739,11 +2763,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2773,10 +2794,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(),
- ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getChain(), LN0->getBasePtr(), ExtVT,
+ LN0->getMemOperand());
AddToWorkList(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2812,7 +2831,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
LN0->getChain(), NewPtr,
LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- Alignment);
+ Alignment, LN0->getTBAAInfo());
AddToWorkList(N);
CombineTo(LN0, Load, Load.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2848,6 +2867,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
+ if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
+ SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false);
+ if (BSwap.getNode())
+ return BSwap;
+ }
+
return SDValue();
}
@@ -2932,13 +2959,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (N00 != N10)
return SDValue();
- // Make sure everything beyond the low halfword is zero since the SRL 16
- // will clear the top bits.
+ // Make sure everything beyond the low halfword gets set to zero since the SRL
+ // 16 will clear the top bits.
unsigned OpSizeInBits = VT.getSizeInBits();
- if (DemandHighBits && OpSizeInBits > 16 &&
- (!LookPassAnd0 || !LookPassAnd1) &&
- !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
- return SDValue();
+ if (DemandHighBits && OpSizeInBits > 16) {
+ // If the left-shift isn't masked out then the only way this is a bswap is
+ // if all bits beyond the low 8 are 0. In that case the entire pattern
+ // reduces to a left shift anyway: leave it for other parts of the combiner.
+ if (!LookPassAnd0)
+ return SDValue();
+
+ // However, if the right shift isn't masked out then it might be because
+ // it's not needed. See if we can spot that too.
+ if (!LookPassAnd1 &&
+ !DAG.MaskedValueIsZero(
+ N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
+ return SDValue();
+ }
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
if (OpSizeInBits > 16)
@@ -3078,7 +3115,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
SDValue(Parts[0],0));
- // Result of the bswap should be rotated by 16. If it's not legal, than
+ // Result of the bswap should be rotated by 16. If it's not legal, then
// do (x << 16) | (x >> 16).
SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
@@ -3343,29 +3380,9 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
if (LHSMask.getNode() || RHSMask.getNode())
return 0;
- // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
- // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
- if (RHSShiftAmt.getOpcode() == ISD::SUB &&
- LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
- if (ConstantSDNode *SUBC =
- dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
- }
- }
-
- // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
- // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
- if (LHSShiftAmt.getOpcode() == ISD::SUB &&
- RHSShiftAmt == LHSShiftAmt.getOperand(1))
- if (ConstantSDNode *SUBC =
- dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0)))
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
- HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
-
- // Look for sign/zext/any-extended or truncate cases:
+ // If the shift amount is sign/zext/any-extended just peel it off.
+ SDValue LExtOp0 = LHSShiftAmt;
+ SDValue RExtOp0 = RHSShiftAmt;
if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
@@ -3374,33 +3391,31 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
- SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
- SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
- if (RExtOp0.getOpcode() == ISD::SUB &&
- RExtOp0.getOperand(1) == LExtOp0) {
- // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
- // (rotl x, y)
- // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
- // (rotr x, (sub 32, y))
- if (ConstantSDNode *SUBC =
+ LExtOp0 = LHSShiftAmt.getOperand(0);
+ RExtOp0 = RHSShiftAmt.getOperand(0);
+ }
+
+ if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
- LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
- } else if (LExtOp0.getOpcode() == ISD::SUB &&
- RExtOp0 == LExtOp0.getOperand(1)) {
- // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
- // (rotr x, y)
- // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
- // (rotl x, (sub 32, y))
- if (ConstantSDNode *SUBC =
+ if (SUBC->getAPIntValue() == OpSizeInBits)
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
- LHSShiftArg,
- HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
- }
+ if (SUBC->getAPIntValue() == OpSizeInBits)
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
}
return 0;
@@ -3620,6 +3635,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
// fold (shl c1, c2) -> c1<<c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
@@ -3697,6 +3718,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
+ // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
+ // Only fold this if the inner zext has no other uses to avoid increasing
+ // the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ if (c1 < VT.getSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 == c2) {
+ SDValue NewOp0 = N0.getOperand(0);
+ EVT CountVT = NewOp0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
+ NewOp0, DAG.getConstant(c2, CountVT));
+ AddToWorkList(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ }
+ }
+ }
+
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
@@ -3750,6 +3792,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
// fold (sra c1, c2) -> (sra c1, c2)
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
@@ -3895,6 +3943,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
// fold (srl c1, c2) -> c1 >>u c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
@@ -4217,6 +4271,23 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
+static
+std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT LoVT, HiVT;
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Split the inputs.
+ SDValue Lo, Hi, LL, LH, RL, RH;
+ llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+
+ return std::make_pair(Lo, Hi);
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4254,6 +4325,34 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
+ // If the VSELECT result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (N0.getOpcode() == ISD::SETCC) {
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
+ llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
+ llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
+ llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
+
+ // Add the new VSELECT nodes to the work list in case they need to be split
+ // again.
+ AddToWorkList(Lo.getNode());
+ AddToWorkList(Hi.getNode());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+ }
+
return SDValue();
}
@@ -4469,10 +4568,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- N0.getValueType(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -4493,10 +4590,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), MemVT,
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
@@ -4524,11 +4619,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(),
LN0->getMemoryVT(),
- LN0->isVolatile(),
- LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
@@ -4593,9 +4685,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
return DAG.getSelect(SDLoc(N), VT,
DAG.getSetCC(SDLoc(N),
- getSetCCResultType(VT),
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ getSetCCResultType(VT),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
}
}
@@ -4762,10 +4854,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- N0.getValueType(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -4795,11 +4885,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(),
LN0->getMemoryVT(),
- LN0->isVolatile(),
- LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
@@ -4826,10 +4913,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), MemVT,
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
@@ -4992,10 +5077,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- N0.getValueType(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -5016,9 +5099,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N),
VT, LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
@@ -5250,12 +5331,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), NewAlign);
+ LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
else
Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- NewAlign);
+ NewAlign, LN0->getTBAAInfo());
// Replace the old load's chain with the new load's chain.
WorkListRemover DeadNodes(*this);
@@ -5353,10 +5434,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- EVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), EVT,
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
AddToWorkList(ExtLoad.getNode());
@@ -5371,10 +5450,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- EVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), EVT,
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -5657,7 +5734,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
unsigned Align = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
@@ -5667,7 +5745,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), OrigAlign);
+ LN0->isInvariant(), OrigAlign,
+ LN0->getTBAAInfo());
AddToWorkList(N);
CombineTo(N0.getNode(),
DAG.getNode(ISD::BITCAST, SDLoc(N0),
@@ -6652,16 +6731,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
}
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
- if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- N0.getValueType(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
@@ -7451,13 +7528,16 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(), Align);
+ LD->isVolatile(), LD->isNonTemporal(), Align,
+ LD->getTBAAInfo());
return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
- if (CombinerAA) {
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -7468,17 +7548,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Replace the chain to void dependency.
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
- BetterChain, Ptr, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ BetterChain, Ptr, LD->getMemOperand());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
LD->getValueType(0),
- BetterChain, Ptr, LD->getPointerInfo(),
- LD->getMemoryVT(),
- LD->isVolatile(),
- LD->isNonTemporal(),
- LD->getAlignment());
+ BetterChain, Ptr, LD->getMemoryVT(),
+ LD->getMemOperand());
}
// Create token factor to keep old chain connected.
@@ -7498,9 +7573,562 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
+ // Try to slice up N to more direct loads if the slices are mapped to
+ // different register banks or pairing can take place.
+ if (SliceUpLoad(N))
+ return SDValue(N, 0);
+
return SDValue();
}
+namespace {
+/// \brief Helper structure used to slice a load in smaller loads.
+/// Basically a slice is obtained from the following sequence:
+/// Origin = load Ty1, Base
+/// Shift = srl Ty1 Origin, CstTy Amount
+/// Inst = trunc Shift to Ty2
+///
+/// Then, it will be rewriten into:
+/// Slice = load SliceTy, Base + SliceOffset
+/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
+///
+/// SliceTy is deduced from the number of bits that are actually used to
+/// build Inst.
+struct LoadedSlice {
+ /// \brief Helper structure used to compute the cost of a slice.
+ struct Cost {
+ /// Are we optimizing for code size.
+ bool ForCodeSize;
+ /// Various cost.
+ unsigned Loads;
+ unsigned Truncates;
+ unsigned CrossRegisterBanksCopies;
+ unsigned ZExts;
+ unsigned Shift;
+
+ Cost(bool ForCodeSize = false)
+ : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
+ CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
+
+ /// \brief Get the cost of one isolated slice.
+ Cost(const LoadedSlice &LS, bool ForCodeSize = false)
+ : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
+ CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
+ EVT TruncType = LS.Inst->getValueType(0);
+ EVT LoadedType = LS.getLoadedType();
+ if (TruncType != LoadedType &&
+ !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
+ ZExts = 1;
+ }
+
+ /// \brief Account for slicing gain in the current cost.
+ /// Slicing provide a few gains like removing a shift or a
+ /// truncate. This method allows to grow the cost of the original
+ /// load with the gain from this slice.
+ void addSliceGain(const LoadedSlice &LS) {
+ // Each slice saves a truncate.
+ const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
+ if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
+ LS.Inst->getOperand(0).getValueType()))
+ ++Truncates;
+ // If there is a shift amount, this slice gets rid of it.
+ if (LS.Shift)
+ ++Shift;
+ // If this slice can merge a cross register bank copy, account for it.
+ if (LS.canMergeExpensiveCrossRegisterBankCopy())
+ ++CrossRegisterBanksCopies;
+ }
+
+ Cost &operator+=(const Cost &RHS) {
+ Loads += RHS.Loads;
+ Truncates += RHS.Truncates;
+ CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
+ ZExts += RHS.ZExts;
+ Shift += RHS.Shift;
+ return *this;
+ }
+
+ bool operator==(const Cost &RHS) const {
+ return Loads == RHS.Loads && Truncates == RHS.Truncates &&
+ CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
+ ZExts == RHS.ZExts && Shift == RHS.Shift;
+ }
+
+ bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
+
+ bool operator<(const Cost &RHS) const {
+ // Assume cross register banks copies are as expensive as loads.
+ // FIXME: Do we want some more target hooks?
+ unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
+ unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
+ // Unless we are optimizing for code size, consider the
+ // expensive operation first.
+ if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
+ return ExpensiveOpsLHS < ExpensiveOpsRHS;
+ return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
+ (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
+ }
+
+ bool operator>(const Cost &RHS) const { return RHS < *this; }
+
+ bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
+
+ bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
+ };
+ // The last instruction that represent the slice. This should be a
+ // truncate instruction.
+ SDNode *Inst;
+ // The original load instruction.
+ LoadSDNode *Origin;
+ // The right shift amount in bits from the original load.
+ unsigned Shift;
+ // The DAG from which Origin came from.
+ // This is used to get some contextual information about legal types, etc.
+ SelectionDAG *DAG;
+
+ LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
+ unsigned Shift = 0, SelectionDAG *DAG = NULL)
+ : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
+
+ LoadedSlice(const LoadedSlice &LS)
+ : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
+
+ /// \brief Get the bits used in a chunk of bits \p BitWidth large.
+ /// \return Result is \p BitWidth and has used bits set to 1 and
+ /// not used bits set to 0.
+ APInt getUsedBits() const {
+ // Reproduce the trunc(lshr) sequence:
+ // - Start from the truncated value.
+ // - Zero extend to the desired bit width.
+ // - Shift left.
+ assert(Origin && "No original load to compare against.");
+ unsigned BitWidth = Origin->getValueSizeInBits(0);
+ assert(Inst && "This slice is not bound to an instruction");
+ assert(Inst->getValueSizeInBits(0) <= BitWidth &&
+ "Extracted slice is bigger than the whole type!");
+ APInt UsedBits(Inst->getValueSizeInBits(0), 0);
+ UsedBits.setAllBits();
+ UsedBits = UsedBits.zext(BitWidth);
+ UsedBits <<= Shift;
+ return UsedBits;
+ }
+
+ /// \brief Get the size of the slice to be loaded in bytes.
+ unsigned getLoadedSize() const {
+ unsigned SliceSize = getUsedBits().countPopulation();
+ assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
+ return SliceSize / 8;
+ }
+
+ /// \brief Get the type that will be loaded for this slice.
+ /// Note: This may not be the final type for the slice.
+ EVT getLoadedType() const {
+ assert(DAG && "Missing context");
+ LLVMContext &Ctxt = *DAG->getContext();
+ return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
+ }
+
+ /// \brief Get the alignment of the load used for this slice.
+ unsigned getAlignment() const {
+ unsigned Alignment = Origin->getAlignment();
+ unsigned Offset = getOffsetFromBase();
+ if (Offset != 0)
+ Alignment = MinAlign(Alignment, Alignment + Offset);
+ return Alignment;
+ }
+
+ /// \brief Check if this slice can be rewritten with legal operations.
+ bool isLegal() const {
+ // An invalid slice is not legal.
+ if (!Origin || !Inst || !DAG)
+ return false;
+
+ // Offsets are for indexed load only, we do not handle that.
+ if (Origin->getOffset().getOpcode() != ISD::UNDEF)
+ return false;
+
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+
+ // Check that the type is legal.
+ EVT SliceType = getLoadedType();
+ if (!TLI.isTypeLegal(SliceType))
+ return false;
+
+ // Check that the load is legal for this type.
+ if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
+ return false;
+
+ // Check that the offset can be computed.
+ // 1. Check its type.
+ EVT PtrType = Origin->getBasePtr().getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ // 2. Check that it fits in the immediate.
+ if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
+ return false;
+
+ // 3. Check that the computation is legal.
+ if (!TLI.isOperationLegal(ISD::ADD, PtrType))
+ return false;
+
+ // Check that the zext is legal if it needs one.
+ EVT TruncateType = Inst->getValueType(0);
+ if (TruncateType != SliceType &&
+ !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
+ return false;
+
+ return true;
+ }
+
+ /// \brief Get the offset in bytes of this slice in the original chunk of
+ /// bits.
+ /// \pre DAG != NULL.
+ uint64_t getOffsetFromBase() const {
+ assert(DAG && "Missing context.");
+ bool IsBigEndian =
+ DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
+ assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
+ uint64_t Offset = Shift / 8;
+ unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
+ assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
+ "The size of the original loaded type is not a multiple of a"
+ " byte.");
+ // If Offset is bigger than TySizeInBytes, it means we are loading all
+ // zeros. This should have been optimized before in the process.
+ assert(TySizeInBytes > Offset &&
+ "Invalid shift amount for given loaded size");
+ if (IsBigEndian)
+ Offset = TySizeInBytes - Offset - getLoadedSize();
+ return Offset;
+ }
+
+ /// \brief Generate the sequence of instructions to load the slice
+ /// represented by this object and redirect the uses of this slice to
+ /// this new sequence of instructions.
+ /// \pre this->Inst && this->Origin are valid Instructions and this
+ /// object passed the legal check: LoadedSlice::isLegal returned true.
+ /// \return The last instruction of the sequence used to load the slice.
+ SDValue loadSlice() const {
+ assert(Inst && Origin && "Unable to replace a non-existing slice.");
+ const SDValue &OldBaseAddr = Origin->getBasePtr();
+ SDValue BaseAddr = OldBaseAddr;
+ // Get the offset in that chunk of bytes w.r.t. the endianess.
+ int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
+ assert(Offset >= 0 && "Offset too big to fit in int64_t!");
+ if (Offset) {
+ // BaseAddr = BaseAddr + Offset.
+ EVT ArithType = BaseAddr.getValueType();
+ BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
+ DAG->getConstant(Offset, ArithType));
+ }
+
+ // Create the type of the loaded slice according to its size.
+ EVT SliceType = getLoadedType();
+
+ // Create the load for the slice.
+ SDValue LastInst = DAG->getLoad(
+ SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
+ Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
+ Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
+ // If the final type is not the same as the loaded type, this means that
+ // we have to pad with zero. Create a zero extend for that.
+ EVT FinalType = Inst->getValueType(0);
+ if (SliceType != FinalType)
+ LastInst =
+ DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
+ return LastInst;
+ }
+
+ /// \brief Check if this slice can be merged with an expensive cross register
+ /// bank copy. E.g.,
+ /// i = load i32
+ /// f = bitcast i32 i to float
+ bool canMergeExpensiveCrossRegisterBankCopy() const {
+ if (!Inst || !Inst->hasOneUse())
+ return false;
+ SDNode *Use = *Inst->use_begin();
+ if (Use->getOpcode() != ISD::BITCAST)
+ return false;
+ assert(DAG && "Missing context");
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ EVT ResVT = Use->getValueType(0);
+ const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+ const TargetRegisterClass *ArgRC =
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+ if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // At this point, we know that we perform a cross-register-bank copy.
+ // Check if it is expensive.
+ const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
+ // Assume bitcasts are cheap, unless both register classes do not
+ // explicitly share a common sub class.
+ if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
+ return false;
+
+ // Check if it will be merged with the load.
+ // 1. Check the alignment constraint.
+ unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
+ ResVT.getTypeForEVT(*DAG->getContext()));
+
+ if (RequiredAlignment > getAlignment())
+ return false;
+
+ // 2. Check that the load is a legal operation for that type.
+ if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // 3. Check that we do not have a zext in the way.
+ if (Inst->getValueType(0) != getLoadedType())
+ return false;
+
+ return true;
+ }
+};
+}
+
+/// \brief Sorts LoadedSlice according to their offset.
+struct LoadedSliceSorter {
+ bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+ return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+ }
+};
+
+/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
+/// \p UsedBits looks like 0..0 1..1 0..0.
+static bool areUsedBitsDense(const APInt &UsedBits) {
+ // If all the bits are one, this is dense!
+ if (UsedBits.isAllOnesValue())
+ return true;
+
+ // Get rid of the unused bits on the right.
+ APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
+ // Get rid of the unused bits on the left.
+ if (NarrowedUsedBits.countLeadingZeros())
+ NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
+ // Check that the chunk of bits is completely used.
+ return NarrowedUsedBits.isAllOnesValue();
+}
+
+/// \brief Check whether or not \p First and \p Second are next to each other
+/// in memory. This means that there is no hole between the bits loaded
+/// by \p First and the bits loaded by \p Second.
+static bool areSlicesNextToEachOther(const LoadedSlice &First,
+ const LoadedSlice &Second) {
+ assert(First.Origin == Second.Origin && First.Origin &&
+ "Unable to match different memory origins.");
+ APInt UsedBits = First.getUsedBits();
+ assert((UsedBits & Second.getUsedBits()) == 0 &&
+ "Slices are not supposed to overlap.");
+ UsedBits |= Second.getUsedBits();
+ return areUsedBitsDense(UsedBits);
+}
+
+/// \brief Adjust the \p GlobalLSCost according to the target
+/// paring capabilities and the layout of the slices.
+/// \pre \p GlobalLSCost should account for at least as many loads as
+/// there is in the slices in \p LoadedSlices.
+static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ LoadedSlice::Cost &GlobalLSCost) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ // If there is less than 2 elements, no pairing is possible.
+ if (NumberOfSlices < 2)
+ return;
+
+ // Sort the slices so that elements that are likely to be next to each
+ // other in memory are next to each other in the list.
+ std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
+ const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
+ // First (resp. Second) is the first (resp. Second) potentially candidate
+ // to be placed in a paired load.
+ const LoadedSlice *First = NULL;
+ const LoadedSlice *Second = NULL;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
+ // Set the beginning of the pair.
+ First = Second) {
+
+ Second = &LoadedSlices[CurrSlice];
+
+ // If First is NULL, it means we start a new pair.
+ // Get to the next slice.
+ if (!First)
+ continue;
+
+ EVT LoadedType = First->getLoadedType();
+
+ // If the types of the slices are different, we cannot pair them.
+ if (LoadedType != Second->getLoadedType())
+ continue;
+
+ // Check if the target supplies paired loads for this type.
+ unsigned RequiredAlignment = 0;
+ if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
+ // move to the next pair, this type is hopeless.
+ Second = NULL;
+ continue;
+ }
+ // Check if we meet the alignment requirement.
+ if (RequiredAlignment > First->getAlignment())
+ continue;
+
+ // Check that both loads are next to each other in memory.
+ if (!areSlicesNextToEachOther(*First, *Second))
+ continue;
+
+ assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
+ --GlobalLSCost.Loads;
+ // Move to the next pair.
+ Second = NULL;
+ }
+}
+
+/// \brief Check the profitability of all involved LoadedSlice.
+/// Currently, it is considered profitable if there is exactly two
+/// involved slices (1) which are (2) next to each other in memory, and
+/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
+///
+/// Note: The order of the elements in \p LoadedSlices may be modified, but not
+/// the elements themselves.
+///
+/// FIXME: When the cost model will be mature enough, we can relax
+/// constraints (1) and (2).
+static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ const APInt &UsedBits, bool ForCodeSize) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ if (StressLoadSlicing)
+ return NumberOfSlices > 1;
+
+ // Check (1).
+ if (NumberOfSlices != 2)
+ return false;
+
+ // Check (2).
+ if (!areUsedBitsDense(UsedBits))
+ return false;
+
+ // Check (3).
+ LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
+ // The original code has one big load.
+ OrigCost.Loads = 1;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
+ const LoadedSlice &LS = LoadedSlices[CurrSlice];
+ // Accumulate the cost of all the slices.
+ LoadedSlice::Cost SliceCost(LS, ForCodeSize);
+ GlobalSlicingCost += SliceCost;
+
+ // Account as cost in the original configuration the gain obtained
+ // with the current slices.
+ OrigCost.addSliceGain(LS);
+ }
+
+ // If the target supports paired load, adjust the cost accordingly.
+ adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
+ return OrigCost > GlobalSlicingCost;
+}
+
+/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
+/// operations, split it in the various pieces being extracted.
+///
+/// This sort of thing is introduced by SROA.
+/// This slicing takes care not to insert overlapping loads.
+/// \pre LI is a simple load (i.e., not an atomic or volatile load).
+bool DAGCombiner::SliceUpLoad(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
+ !LD->getValueType(0).isInteger())
+ return false;
+
+ // Keep track of already used bits to detect overlapping values.
+ // In that case, we will just abort the transformation.
+ APInt UsedBits(LD->getValueSizeInBits(0), 0);
+
+ SmallVector<LoadedSlice, 4> LoadedSlices;
+
+ // Check if this load is used as several smaller chunks of bits.
+ // Basically, look for uses in trunc or trunc(lshr) and record a new chain
+ // of computation for each trunc.
+ for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+ // Skip the uses of the chain.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ SDNode *User = *UI;
+ unsigned Shift = 0;
+
+ // Check if this is a trunc(lshr).
+ if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
+ isa<ConstantSDNode>(User->getOperand(1))) {
+ Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
+ User = *User->use_begin();
+ }
+
+ // At this point, User is a Truncate, iff we encountered, trunc or
+ // trunc(lshr).
+ if (User->getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ // The width of the type must be a power of 2 and greater than 8-bits.
+ // Otherwise the load cannot be represented in LLVM IR.
+ // Moreover, if we shifted with a non 8-bits multiple, the slice
+ // will be accross several bytes. We do not support that.
+ unsigned Width = User->getValueSizeInBits(0);
+ if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
+ return 0;
+
+ // Build the slice for this chain of computations.
+ LoadedSlice LS(User, LD, Shift, &DAG);
+ APInt CurrentUsedBits = LS.getUsedBits();
+
+ // Check if this slice overlaps with another.
+ if ((CurrentUsedBits & UsedBits) != 0)
+ return false;
+ // Update the bits used globally.
+ UsedBits |= CurrentUsedBits;
+
+ // Check if the new slice would be legal.
+ if (!LS.isLegal())
+ return false;
+
+ // Record the slice.
+ LoadedSlices.push_back(LS);
+ }
+
+ // Abort slicing if it does not seem to be profitable.
+ if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
+ return false;
+
+ ++SlicedLoads;
+
+ // Rewrite each chain to use an independent load.
+ // By construction, each chain can be represented by a unique load.
+
+ // Prepare the argument for the new token factor for all the slices.
+ SmallVector<SDValue, 8> ArgChains;
+ for (SmallVectorImpl<LoadedSlice>::const_iterator
+ LSIt = LoadedSlices.begin(),
+ LSItEnd = LoadedSlices.end();
+ LSIt != LSItEnd; ++LSIt) {
+ SDValue SliceInst = LSIt->loadSlice();
+ CombineTo(LSIt->Inst, SliceInst, true);
+ if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
+ SliceInst = SliceInst.getOperand(0);
+ assert(SliceInst->getOpcode() == ISD::LOAD &&
+ "It takes more than a zext to get to the loaded slice!!");
+ ArgChains.push_back(SliceInst.getValue(1));
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ return true;
+}
+
/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
/// load is having specific bytes cleared out. If so, return the byte size
/// being masked out and the shift amount.
@@ -7735,7 +8363,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), NewAlign);
+ LD->isInvariant(), NewAlign,
+ LD->getTBAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, SDLoc(N),
@@ -7846,17 +8475,28 @@ struct BaseIndexOffset {
static BaseIndexOffset match(SDValue Ptr) {
bool IsIndexSignExt = false;
- // Just Base or possibly anything else.
+ // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
+ // instruction, then it could be just the BASE or everything else we don't
+ // know how to handle. Just use Ptr as BASE and give up.
if (Ptr->getOpcode() != ISD::ADD)
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
- // Base + offset.
+ // We know that we have at least an ADD instruction. Try to pattern match
+ // the simple case of BASE + OFFSET.
if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
IsIndexSignExt);
}
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Ptr is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
// Look at Base + Index + Offset cases.
SDValue Base = Ptr->getOperand(0);
SDValue IndexOffset = Ptr->getOperand(1);
@@ -8007,6 +8647,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
Index = STn;
break;
} else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ if (Ldn->isVolatile()) {
+ Index = NULL;
+ break;
+ }
+
// Save the load node for later. Continue the scan.
AliasLoadNodes.push_back(Ldn);
NextInChain = Ldn->getChain().getNode();
@@ -8384,7 +9029,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
Ptr, ST->getPointerInfo(), ST->isVolatile(),
- ST->isNonTemporal(), OrigAlign);
+ ST->isNonTemporal(), OrigAlign,
+ ST->getTBAAInfo());
}
// Turn 'store undef, Ptr' -> nothing.
@@ -8399,7 +9045,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// transform should not be done in this case.
if (Value.getOpcode() != ISD::TargetConstantFP) {
SDValue Tmp;
- switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unknown FP type");
case MVT::f16: // We don't do this for these yet.
case MVT::f80:
@@ -8412,8 +9058,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), MVT::i32);
return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getPointerInfo(), ST->isVolatile(),
- ST->isNonTemporal(), ST->getAlignment());
+ Ptr, ST->getMemOperand());
}
break;
case MVT::f64:
@@ -8423,8 +9068,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), MVT::i64);
return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getPointerInfo(), ST->isVolatile(),
- ST->isNonTemporal(), ST->getAlignment());
+ Ptr, ST->getMemOperand());
}
if (!ST->isVolatile() &&
@@ -8440,18 +9084,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
- ST->getAlignment());
+ ST->getAlignment(), TBAAInfo);
Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
Ptr, ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal,
- Alignment);
+ Alignment, TBAAInfo);
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
St0, St1);
}
@@ -8467,7 +9112,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
- ST->isVolatile(), ST->isNonTemporal(), Align);
+ ST->isVolatile(), ST->isNonTemporal(), Align,
+ ST->getTBAAInfo());
}
}
@@ -8477,7 +9123,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (NewST.getNode())
return NewST;
- if (CombinerAA) {
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -8488,14 +9136,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getPointerInfo(),
- ST->getMemoryVT(), ST->isVolatile(),
- ST->isNonTemporal(), ST->getAlignment());
+ ST->getMemoryVT(), ST->getMemOperand());
} else {
ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getPointerInfo(),
- ST->isVolatile(), ST->isNonTemporal(),
- ST->getAlignment());
+ ST->getMemOperand());
}
// Create token to keep both nodes around.
@@ -8528,9 +9172,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
- Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
- ST->isVolatile(), ST->isNonTemporal(),
- ST->getAlignment());
+ Ptr, ST->getMemoryVT(), ST->getMemOperand());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
@@ -8561,9 +9203,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
ST->getMemoryVT())) {
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
- Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
- ST->isVolatile(), ST->isNonTemporal(),
- ST->getAlignment());
+ Ptr, ST->getMemoryVT(), ST->getMemOperand());
}
// Only perform this optimization before the types are legal, because we
@@ -8821,13 +9461,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
? ISD::ZEXTLOAD : ISD::EXTLOAD;
Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
- LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ Align, LN0->getTBAAInfo());
Chain = Load.getValue(1);
} else {
Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Align);
+ LN0->isInvariant(), Align, LN0->getTBAAInfo());
Chain = Load.getValue(1);
if (NVT.bitsLT(LVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
@@ -9165,8 +9806,35 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return N->getOperand(0);
// Check if all of the operands are undefs.
+ EVT VT = N->getValueType(0);
if (ISD::allOperandsUndef(N))
- return DAG.getUNDEF(N->getValueType(0));
+ return DAG.getUNDEF(VT);
+
+ // Optimize concat_vectors where one of the vectors is undef.
+ if (N->getNumOperands() == 2 &&
+ N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+ SDValue In = N->getOperand(0);
+ assert(In.getValueType().isVector() && "Must concat vectors");
+
+ // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
+ if (In->getOpcode() == ISD::BITCAST &&
+ !In->getOperand(0)->getValueType(0).isVector()) {
+ SDValue Scalar = In->getOperand(0);
+ EVT SclTy = Scalar->getValueType(0);
+
+ if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
+ VT.getSizeInBits() / SclTy.getSizeInBits());
+ if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
+ return SDValue();
+
+ SDLoc dl = SDLoc(N);
+ SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ }
+ }
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
@@ -9225,7 +9893,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// (extract_subvec (concat V1, V2, ...), i)
// Into:
// Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same
+ // type.
if (V->getOperand(0).getValueType() != NVT)
return SDValue();
unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -9358,10 +10027,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= 0) {
- if (Idx < (int)NumElts)
- Idx += NumElts;
- else
+ if (Idx >= (int)NumElts)
Idx -= NumElts;
+ else
+ Idx = -1; // remove reference to lhs
}
NewMask.push_back(Idx);
}
@@ -9738,7 +10407,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
Load = DAG.getLoad(TheSelect->getValueType(0),
SDLoc(TheSelect),
- // FIXME: Discards pointer info.
+ // FIXME: Discards pointer and TBAA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->isVolatile(), LLD->isNonTemporal(),
LLD->isInvariant(), LLD->getAlignment());
@@ -9747,7 +10416,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
RLD->getExtensionType() : LLD->getExtensionType(),
SDLoc(TheSelect),
TheSelect->getValueType(0),
- // FIXME: Discards pointer info.
+ // FIXME: Discards pointer and TBAA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->getMemoryVT(), LLD->isVolatile(),
LLD->isNonTemporal(), LLD->getAlignment());
@@ -9852,7 +10521,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
Cond, One, Zero);
AddToWorkList(CstOffset.getNode());
- CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
@@ -9974,9 +10643,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
return Temp;
// shl setcc result by log2 n2c
- return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(),
- getShiftAmountTy(Temp.getValueType())));
+ return DAG.getNode(
+ ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy(Temp.getValueType())));
}
}
@@ -10132,17 +10802,20 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
-bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
const MDNode *TBAAInfo1,
- SDValue Ptr2, int64_t Size2,
+ SDValue Ptr2, int64_t Size2, bool IsVolatile2,
const Value *SrcValue2, int SrcValueOffset2,
unsigned SrcValueAlign2,
const MDNode *TBAAInfo2) const {
// If they are the same then they must be aliases.
if (Ptr1 == Ptr2) return true;
+ // If they are both volatile then they cannot be reordered.
+ if (IsVolatile1 && IsVolatile2) return true;
+
// Gather base node and offset information.
SDValue Base1, Base2;
int64_t Offset1, Offset2;
@@ -10187,7 +10860,9 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
return false;
}
- if (CombinerGlobalAA) {
+ bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA && SrcValue1 && SrcValue2) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
@@ -10206,24 +10881,25 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
SDValue Ptr0, Ptr1;
int64_t Size0, Size1;
+ bool IsVolatile0, IsVolatile1;
const Value *SrcValue0, *SrcValue1;
int SrcValueOffset0, SrcValueOffset1;
unsigned SrcValueAlign0, SrcValueAlign1;
const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
- FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0,
+ FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
SrcValueAlign0, SrcTBAAInfo0);
- FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1,
+ FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
SrcValueAlign1, SrcTBAAInfo1);
- return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0,
+ return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
SrcValueAlign0, SrcTBAAInfo0,
- Ptr1, Size1, SrcValue1, SrcValueOffset1,
+ Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
SrcValueAlign1, SrcTBAAInfo1);
}
/// FindAliasInfo - Extracts the relevant alias information from the memory
-/// node. Returns true if the operand was a load.
+/// node. Returns true if the operand was a nonvolatile load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size,
+ SDValue &Ptr, int64_t &Size, bool &IsVolatile,
const Value *&SrcValue,
int &SrcValueOffset,
unsigned &SrcValueAlign,
@@ -10232,11 +10908,12 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
Ptr = LS->getBasePtr();
Size = LS->getMemoryVT().getSizeInBits() >> 3;
+ IsVolatile = LS->isVolatile();
SrcValue = LS->getSrcValue();
SrcValueOffset = LS->getSrcValueOffset();
SrcValueAlign = LS->getOriginalAlignment();
TBAAInfo = LS->getTBAAInfo();
- return isa<LoadSDNode>(LS);
+ return isa<LoadSDNode>(LS) && !IsVolatile;
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
@@ -10249,12 +10926,13 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// Get alias information for node.
SDValue Ptr;
int64_t Size;
+ bool IsVolatile;
const Value *SrcValue;
int SrcValueOffset;
unsigned SrcValueAlign;
const MDNode *SrcTBAAInfo;
- bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
- SrcValueAlign, SrcTBAAInfo);
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue,
+ SrcValueOffset, SrcValueAlign, SrcTBAAInfo);
// Starting off.
Chains.push_back(OriginalChain);
@@ -10295,20 +10973,21 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// Get alias information for Chain.
SDValue OpPtr;
int64_t OpSize;
+ bool OpIsVolatile;
const Value *OpSrcValue;
int OpSrcValueOffset;
unsigned OpSrcValueAlign;
const MDNode *OpSrcTBAAInfo;
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
- OpSrcValue, OpSrcValueOffset,
+ OpIsVolatile, OpSrcValue, OpSrcValueOffset,
OpSrcValueAlign,
OpSrcTBAAInfo);
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
- isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
- SrcTBAAInfo,
- OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+ isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo,
+ OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset,
OpSrcValueAlign, OpSrcTBAAInfo)) {
Aliases.push_back(Chain);
} else {
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b4ac948f..a6f7461 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -638,29 +638,25 @@ bool FastISel::SelectCall(const User *I) {
(!isa<AllocaInst>(Address) ||
!FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address),
- false);
+ false);
- if (Op)
+ if (Op) {
if (Op->isReg()) {
- // Set the indirect flag if the type and the DIVariable's
- // indirect field are in disagreement: Indirectly-addressed
- // variables that are nonpointer types should be marked as
- // indirect, and VLAs should be marked as indirect eventhough
- // they are a pointer type.
- bool IsIndirect = DI->getAddress()->getType()->isPointerTy()
- ^ DIVar.isIndirect();
Op->setIsDebug(true);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect, Op->getReg(), Offset, DI->getVariable());
+ TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
+ DI->getVariable());
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(TargetOpcode::DBG_VALUE)).addOperand(*Op).addImm(0)
- .addMetadata(DI->getVariable());
- else
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(*Op)
+ .addImm(0)
+ .addMetadata(DI->getVariable());
+ } else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ }
return true;
}
case Intrinsic::dbg_value: {
@@ -688,6 +684,7 @@ bool FastISel::SelectCall(const User *I) {
.addFPImm(CF).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
} else if (unsigned Reg = lookUpRegForValue(V)) {
+ // FIXME: This does not handle register-indirect values at offset 0.
bool IsIndirect = DI->getOffset() != 0;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect,
Reg, DI->getOffset(), DI->getVariable());
@@ -1574,4 +1571,19 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
}
+bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
+ // Must be an add.
+ if (!isa<AddOperator>(Add))
+ return false;
+ // Type size needs to match.
+ if (TD.getTypeSizeInBits(GEP->getType()) !=
+ TD.getTypeSizeInBits(Add->getType()))
+ return false;
+ // Must be in the same basic block.
+ if (isa<Instruction>(Add) &&
+ FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB)
+ return false;
+ // Must have a constant operand.
+ return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
+}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index e107276..3a8fb85 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -211,6 +212,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
"IMPLICIT_DEF should have been handled as a special case elsewhere!");
+ unsigned NumResults = CountResults(Node);
for (unsigned i = 0; i < II.getNumDefs(); ++i) {
// If the specific node value is only used by a CopyToReg and the dest reg
// is a vreg in the same register class, use the CopyToReg'd destination
@@ -218,6 +220,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
unsigned VRBase = 0;
const TargetRegisterClass *RC =
TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
+ // If the register class is unknown for the given definition, then try to
+ // infer one from the value type.
+ if (!RC && i < NumResults)
+ RC = TLI->getRegClassFor(Node->getSimpleValueType(i));
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
unsigned NumResults = CountResults(Node);
@@ -722,10 +728,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
+ unsigned NumDefs = II.getNumDefs();
+ const uint16_t *ScratchRegs = NULL;
+
+ // Handle PATCHPOINT specially and then use the generic code.
+ if (Opc == TargetOpcode::PATCHPOINT) {
+ unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
+ NumDefs = NumResults;
+ ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+ }
+
unsigned NumImpUses = 0;
unsigned NodeOperands =
- countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses);
- bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
+ countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
+ bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
if (II.isVariadic())
@@ -748,14 +764,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Emit all of the actual operands of this instruction, adding them to the
// instruction as appropriate.
- bool HasOptPRefs = II.getNumDefs() > NumResults;
+ bool HasOptPRefs = NumDefs > NumResults;
assert((!HasOptPRefs || !HasPhysRegOuts) &&
"Unable to cope with optional defs and phys regs defs!");
- unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+ unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0;
for (unsigned i = NumSkip; i != NodeOperands; ++i)
- AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+ AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II,
VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+ // Add scratch registers as implicit def and early clobber
+ if (ScratchRegs)
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
+ RegState::EarlyClobber);
+
// Transfer all of the memory reference descriptions of this instruction.
MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
cast<MachineSDNode>(Node)->memoperands_end());
@@ -784,8 +806,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
- for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
- unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ for (unsigned i = NumDefs; i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - NumDefs];
if (!Node->hasAnyUseOfValue(i))
continue;
// This implicitly defined physreg has a use.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bd844e5..9061ae9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -95,8 +95,8 @@ private:
SDValue N1, SDValue N2,
ArrayRef<int> Mask) const;
- void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
- SDLoc dl);
+ bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ bool &NeedInvert, SDLoc dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
@@ -311,6 +311,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
+ unsigned AS = ST->getAddressSpace();
+
SDLoc dl(ST);
if (ST->getMemoryVT().isFloatingPoint() ||
ST->getMemoryVT().isVector()) {
@@ -343,7 +345,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Store = DAG.getTruncStore(Chain, dl,
Val, StackPtr, MachinePointerInfo(),
StoredVT, false, false, 0);
- SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS));
SmallVector<SDValue, 8> Stores;
unsigned Offset = 0;
@@ -381,7 +383,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
.getWithOffset(Offset),
MemVT, ST->isVolatile(),
ST->isNonTemporal(),
- MinAlign(ST->getAlignment(), Offset)));
+ MinAlign(ST->getAlignment(), Offset),
+ ST->getTBAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue Result =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -408,13 +411,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
ST->getPointerInfo(), NewStoredVT,
ST->isVolatile(), ST->isNonTemporal(), Alignment);
+
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ DAG.getConstant(IncrementSize, TLI.getPointerTy(AS)));
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
- Alignment);
+ Alignment, ST->getTBAAInfo());
SDValue Result =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -438,10 +442,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
- SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
- LD->isVolatile(),
- LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
+ LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (LoadedVT != VT)
Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
@@ -474,7 +476,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(),
- MinAlign(LD->getAlignment(), Offset));
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getTBAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
MachinePointerInfo(), false, false, 0));
@@ -492,7 +495,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->getPointerInfo().getWithOffset(Offset),
MemVT, LD->isVolatile(),
LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getTBAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
@@ -536,23 +540,25 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isLittleEndian()) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), Alignment);
+ LD->isNonTemporal(), Alignment, LD->getTBAAInfo());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment, IncrementSize),
+ LD->getTBAAInfo());
} else {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), Alignment);
+ LD->isNonTemporal(), Alignment, LD->getTBAAInfo());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment, IncrementSize),
+ LD->getTBAAInfo());
}
// aggregate the two parts
@@ -655,6 +661,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
SDLoc dl(ST);
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
@@ -663,7 +670,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
}
if (CFP->getValueType(0) == MVT::f64) {
@@ -672,7 +679,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
}
if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
@@ -685,12 +692,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (TLI.isBigEndian()) std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ isNonTemporal, Alignment, TBAAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(4));
+ DAG.getConstant(4, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
- isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
+ isVolatile, isNonTemporal, MinAlign(Alignment, 4U),
+ TBAAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -708,6 +716,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
if (!ST->isTruncatingStore()) {
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
@@ -745,7 +754,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result =
DAG.getStore(Chain, dl, Value, Ptr,
ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ isNonTemporal, Alignment, TBAAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -767,7 +776,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ NVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -788,19 +798,20 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the bottom RoundWidth bits.
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
RoundVT,
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth,
TLI.getShiftAmountTy(Value.getValueType())));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
} else {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
@@ -809,16 +820,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
DAG.getConstant(ExtraWidth,
TLI.getShiftAmountTy(Value.getValueType())));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
- RoundVT, isVolatile, isNonTemporal, Alignment);
+ RoundVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
}
// The order of the stores doesn't matter.
@@ -854,7 +866,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
SDValue Result =
DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -902,9 +914,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
"Can only promote loads to same size type");
- SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand());
RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
RChain = Res.getValue(1);
break;
@@ -924,6 +934,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
if (SrcWidth != SrcVT.getStoreSizeInBits() &&
// Some targets pretend to have an i1 loading operation, and actually
@@ -950,7 +961,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
SDValue Result =
DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
Ch = Result.getValue(1); // The chain.
@@ -987,16 +998,16 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
Chain, Ptr,
LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
+ isNonTemporal, Alignment, TBAAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1016,17 +1027,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the top RoundWidth bits.
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
+ isNonTemporal, Alignment, TBAAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1079,9 +1090,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
case TargetLowering::Expand:
if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
- LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ LD->getMemOperand());
unsigned ExtendOp;
switch (ExtType) {
case ISD::EXTLOAD:
@@ -1109,9 +1118,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Turn the unsupported load into an EXTLOAD followed by an explicit
// zero/sign extend inreg.
SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Chain, Ptr, LD->getPointerInfo(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ Chain, Ptr, SrcVT,
+ LD->getMemOperand());
SDValue ValRes;
if (ExtType == ISD::SEXTLOAD)
ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1386,11 +1394,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
DAG.getConstant(EltSize, Idx.getValueType()));
- if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
- Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
- else
- Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
-
+ Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
if (Op.getValueType().isVector())
@@ -1428,11 +1432,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
DAG.getConstant(EltSize, Idx.getValueType()));
-
- if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
- Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
- else
- Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+ Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
StackPtr);
@@ -1531,7 +1531,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
- LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+ LoadPtr,
+ DAG.getConstant(ByteOffset, LoadPtr.getValueType()));
// Load a legal integer containing the sign bit.
SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
false, false, false, 0);
@@ -1580,10 +1581,10 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
- if (Align > StackAlign)
- SP = DAG.getNode(ISD::AND, dl, VT, SP,
- DAG.getConstant(-(uint64_t)Align, VT));
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ if (Align > StackAlign)
+ Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
+ DAG.getConstant(-(uint64_t)Align, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
@@ -1595,22 +1596,44 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
}
/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
-/// condition code CC on the current target. This routine expands SETCC with
-/// illegal condition code into AND / OR of multiple SETCC values.
-void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+/// condition code CC on the current target.
+///
+/// If the SETCC has been legalized using AND / OR, then the legalized node
+/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
+/// will be set to false.
+///
+/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
+/// then the values of LHS and RHS will be swapped, CC will be set to the
+/// new condition, and NeedInvert will be set to false.
+///
+/// If the SETCC has been legalized using the inverse condcode, then LHS and
+/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
+/// will be set to true. The caller must invert the result of the SETCC with
+/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a
+/// true/false result.
+///
+/// \returns true if the SetCC has been legalized, false if it hasn't.
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
SDValue &LHS, SDValue &RHS,
SDValue &CC,
+ bool &NeedInvert,
SDLoc dl) {
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ NeedInvert = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
case TargetLowering::Expand: {
+ ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(InvCC);
+ return true;
+ }
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
- ISD::CondCode InvCC = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
@@ -1650,18 +1673,21 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETLT:
+ // We only support using the inverted operation, which is computed above
+ // and not a different manner of supporting expanding these cases.
+ llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETNE:
case ISD::SETEQ:
- InvCC = ISD::getSetCCSwappedOperands(CCCode);
- if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
- // We only support using the inverted operation and not a
- // different manner of supporting expanding these cases.
- llvm_unreachable("Don't know how to expand this condition!");
+ // Try inverting the result of the inverse condition.
+ InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ;
+ if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ return true;
}
- LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
- RHS = SDValue();
- CC = SDValue();
- return;
+ // If inverting the condition didn't work then we have no means to expand
+ // the condition.
+ llvm_unreachable("Don't know how to expand this condition!");
}
SDValue SetCC1, SetCC2;
@@ -1678,9 +1704,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
- break;
+ return true;
}
}
+ return false;
}
/// EmitStackConvert - Emit a store/load combination to the stack. This stores
@@ -1969,7 +1996,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
@@ -1987,7 +2014,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
@@ -2002,7 +2029,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
@@ -2049,7 +2076,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
bool isSigned = Opcode == ISD::SDIVREM;
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
@@ -2106,7 +2133,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = RTLIB::SINCOS_F32; break;
case MVT::f64: LC = RTLIB::SINCOS_F64; break;
@@ -2156,7 +2183,7 @@ void
SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = RTLIB::SINCOS_F32; break;
case MVT::f64: LC = RTLIB::SINCOS_F64; break;
@@ -2232,11 +2259,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
// word offset constant for Hi/Lo address computation
- SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType());
// set up Hi and Lo (into buffer) address based on endian
SDValue Hi = StackSlot;
- SDValue Lo = DAG.getNode(ISD::ADD, dl,
- TLI.getPointerTy(), StackSlot, WordOff);
+ SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
+ StackSlot, WordOff);
if (TLI.isLittleEndian())
std::swap(Hi, Lo);
@@ -2382,7 +2409,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
// as a negative number. To counteract this, the dynamic code adds an
// offset depending on the data type.
uint64_t FF;
- switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ switch (Op0.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unsupported integer type!");
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
@@ -2395,7 +2422,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+ CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
Alignment = std::min(Alignment, 4u);
SDValue FudgeInReg;
if (DestVT == MVT::f32)
@@ -2656,6 +2683,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
}
break;
case ISD::ATOMIC_CMP_SWAP:
@@ -2665,6 +2693,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
}
break;
case ISD::ATOMIC_LOAD_ADD:
@@ -2674,6 +2703,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
}
break;
case ISD::ATOMIC_LOAD_SUB:
@@ -2683,6 +2713,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
}
break;
case ISD::ATOMIC_LOAD_AND:
@@ -2692,6 +2723,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
}
break;
case ISD::ATOMIC_LOAD_OR:
@@ -2701,6 +2733,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
}
break;
case ISD::ATOMIC_LOAD_XOR:
@@ -2710,6 +2743,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
}
break;
case ISD::ATOMIC_LOAD_NAND:
@@ -2719,6 +2753,47 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_MAX:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_UMAX:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_MIN:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_UMIN:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break;
}
break;
}
@@ -2730,6 +2805,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ bool NeedInvert;
switch (Node->getOpcode()) {
case ISD::CTPOP:
case ISD::CTLZ:
@@ -2947,20 +3023,20 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (Align > TLI.getMinStackArgumentAlignment()) {
assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
- VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
DAG.getConstant(Align - 1,
- TLI.getPointerTy()));
+ VAList.getValueType()));
- VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+ VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
DAG.getConstant(-(int64_t)Align,
- TLI.getPointerTy()));
+ VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg
- Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
DAG.getConstant(TLI.getDataLayout()->
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
- TLI.getPointerTy()));
+ VAList.getValueType()));
// Store the incremented VAList to the legalized pointer
Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
MachinePointerInfo(V), false, false, 0);
@@ -3231,6 +3307,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128));
break;
+ case ISD::FROUND:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
+ break;
case ISD::FPOWI:
Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
RTLIB::POWI_F80, RTLIB::POWI_F128,
@@ -3565,9 +3648,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
- Index = DAG.getNode(ISD::MUL, dl, PTy,
- Index, DAG.getConstant(EntrySize, PTy));
- SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(),
+ Index, DAG.getConstant(EntrySize, Index.getValueType()));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
+ Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
@@ -3611,10 +3695,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
Tmp3 = Node->getOperand(2);
- LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+ bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, NeedInvert, dl);
+
+ if (Legalized) {
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
+ if (Tmp3.getNode())
+ Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3);
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0));
- // If we expanded the SETCC into an AND/OR, return the new node
- if (Tmp2.getNode() == 0) {
Results.push_back(Tmp1);
break;
}
@@ -3645,14 +3740,52 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp4 = Node->getOperand(3); // False
SDValue CC = Node->getOperand(4);
- LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
- Tmp1, Tmp2, CC, dl);
+ bool Legalized = false;
+ // Try to legalize by inverting the condition. This is for targets that
+ // might support an ordered version of a condition, but not the unordered
+ // version (or vice versa).
+ ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ Tmp1.getValueType().isInteger());
+ if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
+ // Use the new condition code and swap true and false
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+ } else {
+ // If The inverse is not legal, then try to swap the arguments using
+ // the inverse condition code.
+ ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
+ if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) {
+ // The swapped inverse condition is legal, so swap true and false,
+ // lhs and rhs.
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+ }
+ }
+
+ if (!Legalized) {
+ Legalized = LegalizeSetCCCondCode(
+ getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert,
+ dl);
+
+ assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
+
+ // If we expanded the SETCC by inverting the condition code, then swap
+ // the True/False operands to match.
+ if (NeedInvert)
+ std::swap(Tmp3, Tmp4);
- assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
- Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
- CC = DAG.getCondCode(ISD::SETNE);
- Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
- Tmp3, Tmp4, CC);
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SELECT_CC node.
+ if (CC.getNode()) {
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3, Tmp4, CC);
+ } else {
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, Tmp4, CC);
+ }
+ }
Results.push_back(Tmp1);
break;
}
@@ -3662,14 +3795,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp3 = Node->getOperand(3); // RHS
Tmp4 = Node->getOperand(1); // CC
- LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()),
- Tmp2, Tmp3, Tmp4, dl);
-
- assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
- Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
- Tmp4 = DAG.getCondCode(ISD::SETNE);
- Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
- Tmp3, Node->getOperand(4));
+ bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
+ Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl);
+ (void)Legalized;
+ assert(Legalized && "Can't legalize BR_CC with legal condition!");
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0));
+
+ // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
+ // node.
+ if (Tmp4.getNode()) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp4, Tmp2, Tmp3, Node->getOperand(4));
+ } else {
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+ Tmp3, Node->getOperand(4));
+ }
Results.push_back(Tmp1);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index cea0b02..ecf4c5d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
@@ -160,7 +161,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
@@ -172,7 +173,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
RTLIB::CEIL_F80,
RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
@@ -226,7 +227,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
RTLIB::COS_F80,
RTLIB::COS_F128,
RTLIB::COS_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
@@ -239,7 +240,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
@@ -251,7 +252,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
RTLIB::EXP_F80,
RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
@@ -263,7 +264,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_F80,
RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
@@ -275,7 +276,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
RTLIB::FLOOR_F80,
RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
@@ -287,7 +288,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
RTLIB::LOG_F80,
RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
@@ -299,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
RTLIB::LOG2_F80,
RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
@@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
RTLIB::LOG10_F80,
RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -325,7 +326,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- NVT, Ops, 3, false, SDLoc(N));
+ NVT, Ops, 3, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
@@ -338,7 +339,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
@@ -350,7 +351,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
RTLIB::NEARBYINT_F80,
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
@@ -364,7 +365,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -372,7 +373,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -381,7 +382,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = N->getOperand(0);
return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
- SDLoc(N));
+ SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
@@ -402,7 +403,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_F80,
RTLIB::POW_F128,
RTLIB::POW_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -416,7 +417,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
RTLIB::POWI_F80,
RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
@@ -429,7 +430,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
RTLIB::REM_F80,
RTLIB::REM_F128,
RTLIB::REM_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
@@ -441,7 +442,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
RTLIB::RINT_F80,
RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128),
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
@@ -453,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
RTLIB::SIN_F80,
RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
@@ -465,7 +478,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
RTLIB::SQRT_F80,
RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
@@ -478,7 +491,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -490,7 +503,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
RTLIB::TRUNC_F80,
RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
@@ -504,7 +517,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
L->getPointerInfo(), NVT, L->isVolatile(),
- L->isNonTemporal(), false, L->getAlignment());
+ L->isNonTemporal(), false, L->getAlignment(),
+ L->getTBAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -516,7 +530,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getMemoryVT(), dl, L->getChain(),
L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
- L->isNonTemporal(), false, L->getAlignment());
+ L->isNonTemporal(), false, L->getAlignment(),
+ L->getTBAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -585,7 +600,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, false, dl);
+ &Op, 1, false, dl).first;
}
@@ -645,7 +660,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -676,7 +691,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
@@ -684,14 +699,14 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
EVT RVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -754,9 +769,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
Val = GetSoftenedFloat(Val);
return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
- ST->getPointerInfo(),
- ST->isVolatile(), ST->isNonTemporal(),
- ST->getAlignment());
+ ST->getMemOperand());
}
@@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
@@ -912,7 +926,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
N->getValueType(0), Ops, 2, false,
- SDLoc(N));
+ SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -986,7 +1000,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
N->getValueType(0), Ops, 3, false,
- SDLoc(N));
+ SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1000,7 +1014,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
N->getValueType(0), Ops, 2, false,
- SDLoc(N));
+ SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
GetPairElements(Call, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
@@ -1102,7 +1128,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
N->getValueType(0), Ops, 2, false,
- SDLoc(N));
+ SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1134,8 +1160,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
- LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->getMemoryVT(), LD->getMemOperand());
// Remember the chain.
Chain = Hi.getValue(1);
@@ -1181,7 +1206,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl);
+ Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first;
GetPairElements(Hi, Lo, Hi);
}
@@ -1251,6 +1276,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break;
case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
@@ -1325,6 +1351,17 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
N->getOperand(4)), 0);
}
+SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(1), Lo, Hi);
+ // The ppcf128 value is providing only the sign; take it from the
+ // higher-order double (which must have the larger magnitude).
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N),
+ N->getValueType(0), N->getOperand(0), Hi);
+}
+
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
"Logic only correct for ppcf128!");
@@ -1353,7 +1390,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl);
+ return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1386,7 +1423,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
- false, dl);
+ false, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
@@ -1445,7 +1482,5 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
GetExpandedOp(ST->getValue(), Lo, Hi);
return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr,
- ST->getPointerInfo(),
- ST->getMemoryVT(), ST->isVolatile(),
- ST->isNonTemporal(), ST->getAlignment());
+ ST->getMemoryVT(), ST->getMemOperand());
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index ff8f1f9..4255948 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -417,9 +417,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
SDLoc dl(N);
SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
- N->getPointerInfo(),
- N->getMemoryVT(), N->isVolatile(),
- N->isNonTemporal(), N->getAlignment());
+ N->getMemoryVT(), N->getMemOperand());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -919,7 +917,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
// type does not have a strange size (eg: it is not i1).
EVT VecVT = N->getValueType(0);
unsigned NumElts = VecVT.getVectorNumElements();
- assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+ assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
+ "Legal vector of one illegal element?");
// Promote the inserted value. The type does not need to match the
// vector element type. Check that any extra bits introduced will be
@@ -1037,17 +1036,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
- unsigned Alignment = N->getAlignment();
- bool isVolatile = N->isVolatile();
- bool isNonTemporal = N->isNonTemporal();
SDLoc dl(N);
SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
// Truncate the value and store the result.
- return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
- N->getMemoryVT(),
- isVolatile, isNonTemporal, Alignment);
+ return DAG.getTruncStore(Ch, dl, Val, Ptr,
+ N->getMemoryVT(), N->getMemOperand());
}
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
@@ -1193,6 +1188,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
}
break;
case ISD::ATOMIC_CMP_SWAP:
@@ -1202,6 +1198,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
}
break;
case ISD::ATOMIC_LOAD_ADD:
@@ -1211,6 +1208,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
}
break;
case ISD::ATOMIC_LOAD_SUB:
@@ -1220,6 +1218,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
}
break;
case ISD::ATOMIC_LOAD_AND:
@@ -1229,6 +1228,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
}
break;
case ISD::ATOMIC_LOAD_OR:
@@ -1238,6 +1238,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
}
break;
case ISD::ATOMIC_LOAD_XOR:
@@ -1247,6 +1248,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
}
break;
case ISD::ATOMIC_LOAD_NAND:
@@ -1256,6 +1258,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
}
break;
}
@@ -1770,7 +1773,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl),
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/,
+ dl).first,
Lo, Hi);
}
@@ -1781,7 +1785,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl),
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/,
+ dl).first,
Lo, Hi);
}
@@ -1803,6 +1808,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
bool isInvariant = N->isInvariant();
+ const MDNode *TBAAInfo = N->getTBAAInfo();
SDLoc dl(N);
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1811,7 +1817,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
EVT MemVT = N->getMemoryVT();
Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
- MemVT, isVolatile, isNonTemporal, Alignment);
+ MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
// Remember the chain.
Ch = Lo.getValue(1);
@@ -1833,7 +1839,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -1842,11 +1849,11 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1864,17 +1871,17 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1997,7 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl),
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/,
+ dl).first,
Lo, Hi);
}
@@ -2060,7 +2068,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -2155,7 +2163,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo,
+ Hi);
return;
}
@@ -2238,7 +2247,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -2378,7 +2387,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -2398,7 +2407,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -2685,7 +2694,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2702,6 +2711,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
+ const MDNode *TBAAInfo = N->getTBAAInfo();
SDLoc dl(N);
SDValue Lo, Hi;
@@ -2711,7 +2721,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), isVolatile, isNonTemporal,
- Alignment);
+ Alignment, TBAAInfo);
}
if (TLI.isLittleEndian()) {
@@ -2719,7 +2729,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetExpandedInteger(N->getValue(), Lo, Hi);
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2728,11 +2738,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -2760,17 +2770,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Store both the high bits and maybe some of the low bits.
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
- HiVT, isVolatile, isNonTemporal, Alignment);
+ HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -2835,7 +2845,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
Zero, Four);
unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
- FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(),
+ FudgePtr, Offset);
Alignment = std::min(Alignment, 4u);
// Load the value out, extending it from f32 to the destination float type.
@@ -2852,7 +2863,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl);
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index fd770d1..eb13230 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -958,20 +958,6 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
return SDValue(N->getOperand(ResNo));
}
-/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
-/// which is split into two not necessarily identical pieces.
-void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
- // Currently all types are split in half.
- if (!InVT.isVector()) {
- LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
- } else {
- unsigned NumElements = InVT.getVectorNumElements();
- assert(!(NumElements & 1) && "Splitting vector, but not in half!");
- LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), NumElements/2);
- }
-}
-
/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
/// high parts of the given value.
void DAGTypeLegalizer::GetPairElements(SDValue Pair,
@@ -988,10 +974,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
SDValue Index) {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
- if (Index.getValueType().bitsGT(TLI.getPointerTy()))
- Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
- else
- Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+ Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy());
// Calculate the element offset and add it to the pointer.
unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
@@ -1024,20 +1007,23 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned,
+ dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned,
+ dl).first;
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned,
+ dl).first;
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
return TLI.makeLibCall(DAG, LC, N->getValueType(0),
- &Ops[0], NumOps, isSigned, dl);
+ &Ops[0], NumOps, isSigned, dl).first;
}
// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 63e9af3..13bb08f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -410,6 +410,7 @@ private:
SDValue SoftenFloatRes_FPOWI(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FROUND(SDNode *N);
SDValue SoftenFloatRes_FSIN(SDNode *N);
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
@@ -470,6 +471,7 @@ private:
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -480,6 +482,7 @@ private:
// Float Operand Expansion.
bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
@@ -534,7 +537,7 @@ private:
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
- SDValue ScalarizeVecOp_EXTEND(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -558,6 +561,7 @@ private:
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -628,6 +632,7 @@ private:
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
@@ -699,10 +704,6 @@ private:
GetExpandedFloat(Op, Lo, Hi);
}
- /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
- /// which is split (or expanded) into two not necessarily identical pieces.
- void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
-
/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
/// high parts of the given value.
void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
@@ -730,6 +731,12 @@ private:
GetExpandedFloat(Op, Lo, Hi);
}
+
+ /// This function will split the integer \p Op into \p NumElements
+ /// operations of type \p EltVT and store them in \p Ops.
+ void IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops, EVT EltVT);
+
// Generic Result Expansion.
void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 96f6143..c749fde 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -77,13 +77,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypeWidenVector: {
assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- InVT.getVectorNumElements()/2);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getConstant(InNVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
+ EVT LoVT, HiVT;
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
+ llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
@@ -169,7 +165,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize,
+ StackPtr.getValueType()));
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -253,20 +250,22 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -307,6 +306,25 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Generic Operand Expansion.
//===--------------------------------------------------------------------===//
+void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops,
+ EVT EltVT) {
+ assert(Op.getValueType().isInteger());
+ SDLoc DL(Op);
+ SDValue Parts[2];
+
+ if (NumElements > 1) {
+ NumElements >>= 1;
+ SplitInteger(Op, Parts[0], Parts[1]);
+ if (TLI.isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+ IntegerToVector(Parts[0], NumElements, Ops, EltVT);
+ IntegerToVector(Parts[1], NumElements, Ops, EltVT);
+ } else {
+ Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op));
+ }
+}
+
SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
SDLoc dl(N);
if (N->getValueType(0).isVector()) {
@@ -315,21 +333,27 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
// instead, but only if the new vector type is legal (otherwise there
// is no point, and it might create expansion loops). For example, on
// x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+ //
+ // FIXME: I'm not sure why we are first trying to split the input into
+ // a 2 element vector, so I'm leaving it here to maintain the current
+ // behavior.
+ unsigned NumElts = 2;
EVT OVT = N->getOperand(0).getValueType();
EVT NVT = EVT::getVectorVT(*DAG.getContext(),
TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
- 2);
-
- if (isTypeLegal(NVT)) {
- SDValue Parts[2];
- GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+ NumElts);
+ if (!isTypeLegal(NVT)) {
+ // If we can't find a legal type by splitting the integer in half,
+ // then we can use the node's value type.
+ NumElts = N->getValueType(0).getVectorNumElements();
+ NVT = N->getValueType(0);
+ }
- if (TLI.isBigEndian())
- std::swap(Parts[0], Parts[1]);
+ SmallVector<SDValue, 8> Ops;
+ IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
- SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
- return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
- }
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
// Otherwise, store to a temporary and load out again as the new type.
@@ -439,6 +463,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
bool isNonTemporal = St->isNonTemporal();
+ const MDNode *TBAAInfo = St->getTBAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -450,15 +475,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
- assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
St->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -489,14 +513,12 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
- assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
- "Condition legalized before result?");
- unsigned NumElements = Cond.getValueType().getVectorNumElements();
- EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
- CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
- DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
+ // Check if there are already splitted versions of the vector available and
+ // use those instead of splitting the mask operand again.
+ if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Cond, CL, CH);
+ else
+ llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
@@ -518,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getUNDEF(LoVT);
Hi = DAG.getUNDEF(HiVT);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index bbe11b8..2c3cdcc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -171,7 +171,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
Changed = true;
- return LegalizeOp(TLI.LowerOperation(Result, DAG));
+ return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG));
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandStore(Op));
@@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_TO_UINT:
case ISD::FNEG:
case ISD::FABS:
+ case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
@@ -241,6 +242,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FROUND:
case ISD::FFLOOR:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
@@ -416,7 +418,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ LD->isInvariant(), LD->getAlignment(),
+ LD->getTBAAInfo());
} else {
EVT LoadVT = WideVT;
while (RemainingBytes < LoadBytes) {
@@ -426,13 +429,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LoadVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(), LD->getAlignment(),
+ LD->getTBAAInfo());
}
RemainingBytes -= LoadBytes;
Offset += LoadBytes;
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getIntPtrConstant(LoadBytes));
+ DAG.getConstant(LoadBytes, BasePTR.getValueType()));
LoadVals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -497,10 +501,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ LD->getAlignment(), LD->getTBAAInfo());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getIntPtrConstant(Stride));
+ DAG.getConstant(Stride, BasePTR.getValueType()));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -529,6 +533,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
unsigned NumElem = StVT.getVectorNumElements();
// The type of the data we want to save
@@ -556,10 +561,10 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getIntPtrConstant(Stride));
+ DAG.getConstant(Stride, BasePTR.getValueType()));
Stores.push_back(Store);
}
@@ -597,10 +602,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
// Generate a mask operand.
- EVT MaskTy = TLI.getSetCCResultType(*DAG.getContext(), VT);
- assert(MaskTy.isVector() && "Invalid CC type");
- assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits()
- && "Invalid mask size");
+ EVT MaskTy = VT.changeVectorElementTypeToInteger();
// What is the size of each element in the vector mask.
EVT BitTy = MaskTy.getScalarType();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 54380ec..f7a3e3d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
+ case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -97,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD:
case ISD::AND:
case ISD::FADD:
+ case ISD::FCOPYSIGN:
case ISD::FDIV:
case ISD::FMUL:
case ISD::FPOW:
@@ -215,7 +217,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->isInvariant(), N->getOriginalAlignment());
+ N->isInvariant(), N->getOriginalAlignment(),
+ N->getTBAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -369,7 +372,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
- Res = ScalarizeVecOp_EXTEND(N);
+ case ISD::TRUNCATE:
+ Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
@@ -408,7 +412,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
/// to be scalarized, it must be <1 x ty>. Extend the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
"Unexected vector type!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
@@ -455,12 +459,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment());
+ N->getAlignment(), N->getTBAAInfo());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment());
+ N->getOriginalAlignment(), N->getTBAAInfo());
}
@@ -517,7 +521,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
- case ISD::ANY_EXTEND:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
case ISD::CTTZ:
@@ -540,21 +543,27 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
+ case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
- case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
- case ISD::ZERO_EXTEND:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ SplitVecRes_ExtendOp(N, Lo, Hi);
+ break;
+
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::FADD:
+ case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
case ISD::SDIV:
@@ -615,7 +624,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
// We know the result is a vector. The input may be either a vector or a
// scalar value.
EVT LoVT, HiVT;
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SDLoc dl(N);
SDValue InOp = N->getOperand(0);
@@ -670,7 +679,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned LoNumElts = LoVT.getVectorNumElements();
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
@@ -691,7 +700,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
}
EVT LoVT, HiVT;
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
@@ -707,7 +716,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT LoVT, HiVT;
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
@@ -731,7 +740,8 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT LoVT, HiVT;
- GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) =
+ DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
DAG.getValueType(LoVT));
@@ -783,7 +793,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, StackPtr.getValueType()));
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -794,7 +804,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
Hi = DAG.getUNDEF(HiVT);
}
@@ -804,7 +814,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
EVT LoVT, HiVT;
SDLoc dl(LD);
- GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
ISD::LoadExtType ExtType = LD->getExtensionType();
SDValue Ch = LD->getChain();
@@ -815,20 +825,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
EVT LoMemVT, HiMemVT;
- GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+ llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- isInvariant, Alignment);
+ isInvariant, Alignment, TBAAInfo);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
+ HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -847,24 +859,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc DL(N);
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// Split the input.
- EVT InVT = N->getOperand(0).getValueType();
SDValue LL, LH, RL, RH;
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
- DAG.getConstant(InNVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
-
- RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
- DAG.getConstant(InNVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
+ llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -875,22 +875,15 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
// Get the dest types - they may not match the input types, e.g. int_to_fp.
EVT LoVT, HiVT;
SDLoc dl(N);
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
EVT InVT = N->getOperand(0).getValueType();
- if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
GetSplitVector(N->getOperand(0), Lo, Hi);
- } else {
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
- DAG.getConstant(InNVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
- }
+ else
+ llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
@@ -913,6 +906,58 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
}
}
+void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DestVT = N->getValueType(0);
+ EVT LoVT, HiVT;
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
+
+ // We can do better than a generic split operation if the extend is doing
+ // more than just doubling the width of the elements and the following are
+ // true:
+ // - The number of vector elements is even,
+ // - the source type is legal,
+ // - the type of a split source is illegal,
+ // - the type of an extended (by doubling element size) source is legal, and
+ // - the type of that extended source when split is legal.
+ //
+ // This won't necessarily completely legalize the operation, but it will
+ // more effectively move in the right direction and prevent falling down
+ // to scalarization in many cases due to the input vector being split too
+ // far.
+ unsigned NumElements = SrcVT.getVectorNumElements();
+ if ((NumElements & 1) == 0 &&
+ SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewSrcVT = EVT::getVectorVT(
+ Ctx, EVT::getIntegerVT(
+ Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2),
+ NumElements);
+ EVT SplitSrcVT =
+ EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
+ EVT SplitLoVT, SplitHiVT;
+ llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
+ if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
+ TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
+ DEBUG(dbgs() << "Split vector extend via incremental extend:";
+ N->dump(&DAG); dbgs() << "\n");
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ // Get the low and high halves of the new, extended one step, vector.
+ llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ return;
+ }
+ }
+ // Fall back to the generic unary operator splitting otherwise.
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+}
+
void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
SDValue &Lo, SDValue &Hi) {
// The low and high parts of the original input give four input vectors.
@@ -1105,41 +1150,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
SDValue Mask = N->getOperand(0);
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
+ EVT Src0VT = Src0.getValueType();
SDLoc DL(N);
- EVT MaskVT = Mask.getValueType();
- assert(MaskVT.isVector() && "VSELECT without a vector mask?");
+ assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
assert(Lo.getValueType() == Hi.getValueType() &&
"Lo and Hi have differing types");
- unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
- unsigned HiNumElts = Hi.getValueType().getVectorNumElements();
- assert(LoNumElts == HiNumElts && "Asymmetric vector split?");
-
- LLVMContext &Ctx = *DAG.getContext();
- SDValue Zero = DAG.getConstant(0, TLI.getVectorIdxTy());
- SDValue LoElts = DAG.getConstant(LoNumElts, TLI.getVectorIdxTy());
- EVT Src0VT = Src0.getValueType();
- EVT Src0EltTy = Src0VT.getVectorElementType();
- EVT MaskEltTy = MaskVT.getVectorElementType();
-
- EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts);
- EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts);
- EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts);
- EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts);
+ EVT LoOpVT, HiOpVT;
+ llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
+ assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
- SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero);
- SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero);
-
- SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts);
- SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts);
-
- SDValue LoMask =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero);
- SDValue HiMask =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts);
+ SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
+ llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
+ llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
+ llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
SDValue LoSelect =
DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
@@ -1249,33 +1276,34 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
bool isNT = N->isNonTemporal();
+ const MDNode *TBAAInfo = N->getTBAAInfo();
SDValue Lo, Hi;
GetSplitVector(N->getOperand(1), Lo, Hi);
EVT LoMemVT, HiMemVT;
- GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+ llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- LoMemVT, isVol, isNT, Alignment);
+ LoMemVT, isVol, isNT, Alignment, TBAAInfo);
else
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- isVol, isNT, Alignment);
+ isVol, isNT, Alignment, TBAAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
if (isTruncating)
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVol, isNT, Alignment);
+ HiMemVT, isVol, isNT, Alignment, TBAAInfo);
else
Hi = DAG.getStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- isVol, isNT, Alignment);
+ isVol, isNT, Alignment, TBAAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
@@ -1341,13 +1369,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
SDLoc DL(N);
// Extract the halves of the input via extract_subvector.
- EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), NumElements/2);
- SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
- DAG.getConstant(NumElements/2,
- TLI.getVectorIdxTy()));
+ SDValue InLoVec, InHiVec;
+ llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
// Truncate them to 1/2 the element size.
EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
@@ -1446,27 +1469,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SHUFFLE:
Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
break;
+
case ISD::ADD:
case ISD::AND:
case ISD::BSWAP:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SUB:
+ case ISD::XOR:
+ Res = WidenVecRes_Binary(N);
+ break;
+
case ISD::FADD:
case ISD::FCOPYSIGN:
- case ISD::FDIV:
case ISD::FMUL:
case ISD::FPOW:
- case ISD::FREM:
case ISD::FSUB:
- case ISD::MUL:
- case ISD::MULHS:
- case ISD::MULHU:
- case ISD::OR:
+ case ISD::FDIV:
+ case ISD::FREM:
case ISD::SDIV:
- case ISD::SREM:
case ISD::UDIV:
+ case ISD::SREM:
case ISD::UREM:
- case ISD::SUB:
- case ISD::XOR:
- Res = WidenVecRes_Binary(N);
+ Res = WidenVecRes_BinaryCanTrap(N);
break;
case ISD::FPOWI:
@@ -1507,6 +1534,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FRINT:
+ case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -1534,6 +1562,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
// Binary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
+ // Binary op widening for operations that can trap.
unsigned Opcode = N->getOpcode();
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2532,6 +2569,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2541,7 +2579,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Align);
+ isVolatile, isNonTemporal, isInvariant, Align,
+ TBAAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
@@ -2577,7 +2616,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Increment = NewVTWidth / 8;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getIntPtrConstant(Increment));
+ DAG.getConstant(Increment, BasePtr.getValueType()));
SDValue L;
if (LdWidth < NewVTWidth) {
@@ -2586,7 +2625,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset), isVolatile,
- isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ isNonTemporal, isInvariant, MinAlign(Align, Increment),
+ TBAAInfo);
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) {
SmallVector<SDValue, 16> Loads;
@@ -2602,7 +2642,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset), isVolatile,
- isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ isNonTemporal, isInvariant, MinAlign(Align, Increment),
+ TBAAInfo);
LdChain.push_back(L.getValue(1));
}
@@ -2682,6 +2723,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
@@ -2693,15 +2735,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Increment = LdEltVT.getSizeInBits() / 8;
Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
LD->getPointerInfo(),
- LdEltVT, isVolatile, isNonTemporal, Align);
+ LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr, DAG.getIntPtrConstant(Offset));
+ BasePtr,
+ DAG.getConstant(Offset,
+ BasePtr.getValueType()));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
- isVolatile, isNonTemporal, Align);
+ isVolatile, isNonTemporal, Align, TBAAInfo);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2724,6 +2768,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -2750,12 +2795,12 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
- MinAlign(Align, Offset)));
+ MinAlign(Align, Offset), TBAAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getIntPtrConstant(Increment));
+ DAG.getConstant(Increment, BasePtr.getValueType()));
} while (StWidth != 0 && StWidth >= NewVTWidth);
} else {
// Cast the vector to the scalar type we can store
@@ -2770,11 +2815,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
- MinAlign(Align, Offset)));
+ MinAlign(Align, Offset), TBAAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getIntPtrConstant(Increment));
+ DAG.getConstant(Increment, BasePtr.getValueType()));
} while (StWidth != 0 && StWidth >= NewVTWidth);
// Restore index back to be relative to the original widen element type
Idx = Idx * NewVTWidth / ValEltWidth;
@@ -2792,6 +2837,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -2814,17 +2860,19 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
DAG.getConstant(0, TLI.getVectorIdxTy()));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo(), StEltVT,
- isVolatile, isNonTemporal, Align));
+ isVolatile, isNonTemporal, Align,
+ TBAAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr, DAG.getIntPtrConstant(Offset));
+ BasePtr, DAG.getConstant(Offset,
+ BasePtr.getValueType()));
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getConstant(0, TLI.getVectorIdxTy()));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
ST->getPointerInfo().getWithOffset(Offset),
StEltVT, isVolatile, isNonTemporal,
- MinAlign(Align, Offset)));
+ MinAlign(Align, Offset), TBAAInfo));
}
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index d684164..1dd2128 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -389,10 +389,9 @@ signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
// Constants used to denote relative importance of
// heuristic components for cost computation.
static const unsigned PriorityOne = 200;
-static const unsigned PriorityTwo = 100;
-static const unsigned PriorityThree = 50;
-static const unsigned PriorityFour = 15;
-static const unsigned PriorityFive = 5;
+static const unsigned PriorityTwo = 50;
+static const unsigned PriorityThree = 15;
+static const unsigned PriorityFour = 5;
static const unsigned ScaleOne = 20;
static const unsigned ScaleTwo = 10;
static const unsigned ScaleThree = 5;
@@ -449,7 +448,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
if (N->isMachineOpcode()) {
const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
if (TID.isCall())
- ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+ ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
}
else
switch (N->getOpcode()) {
@@ -457,11 +456,11 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
case ISD::TokenFactor:
case ISD::CopyFromReg:
case ISD::CopyToReg:
- ResCount += PriorityFive;
+ ResCount += PriorityFour;
break;
case ISD::INLINEASM:
- ResCount += PriorityFour;
+ ResCount += PriorityThree;
break;
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index f5fe168..1a562d7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -718,7 +718,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
// indicate the scheduled cycle.
SU->setHeightToAtLeast(CurCycle);
- // Reserve resources for the scheduled intruction.
+ // Reserve resources for the scheduled instruction.
EmitNode(SU);
Sequence.push_back(SU);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 982dcc9..054e3dd 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -690,15 +690,6 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
}
#endif // NDEBUG
-namespace {
- struct OrderSorter {
- bool operator()(const std::pair<unsigned, MachineInstr*> &A,
- const std::pair<unsigned, MachineInstr*> &B) {
- return A.first < B.first;
- }
- };
-}
-
/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
static void
ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
@@ -744,7 +735,10 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
}
MachineBasicBlock *BB = Emitter.getBlock();
- if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
+ // Fast-isel may have inserted some instructions, in which case the
+ // BB->back().isPHI() test will not fire when we want it to.
+ prior(Emitter.getInsertPos())->isPHI()) {
// Did not insert any instruction.
Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
return;
@@ -857,7 +851,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Sort the source order instructions and use the order to insert debug
// values.
- std::sort(Orders.begin(), Orders.end(), OrderSorter());
+ std::sort(Orders.begin(), Orders.end(), less_first());
SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bc6063c..45d5a4f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -869,16 +869,19 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), OptLevel(OL),
+ : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), UpdateListeners(0) {
+ Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
+ UpdateListeners(0) {
AllNodes.push_back(&EntryNode);
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) {
+void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti,
+ const TargetLowering *tli) {
MF = &mf;
TTI = tti;
+ TLI = tli;
Context = &mf.getFunction()->getContext();
}
@@ -983,6 +986,54 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
+ // In other cases the element type is illegal and needs to be expanded, for
+ // example v2i64 on MIPS32. In this case, find the nearest legal type, split
+ // the value into n parts and use a vector type with n-times the elements.
+ // Then bitcast to the type requested.
+ // Legalizing constants too early makes the DAGCombiner's job harder so we
+ // only legalize if the DAG tells us we must produce legal types.
+ else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
+ TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypeExpandInteger) {
+ APInt NewVal = Elt->getValue();
+ EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+ unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
+ EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
+
+ // Check the temporary vector is the correct size. If this fails then
+ // getTypeToTransformTo() probably returned a type whose size (in bits)
+ // isn't a power-of-2 factor of the requested type size.
+ assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SmallVector<SDValue, 2> EltParts;
+ for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
+ EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
+ .trunc(ViaEltSizeInBits),
+ ViaEltVT, isT));
+ }
+
+ // EltParts is currently in little endian order. If we actually want
+ // big-endian order then reverse it now.
+ if (TLI->isBigEndian())
+ std::reverse(EltParts.begin(), EltParts.end());
+
+ // The elements must be reversed when the element order is different
+ // to the endianness of the elements (because the BITCAST is itself a
+ // vector shuffle in this situation). However, we do not need any code to
+ // perform this reversal because getConstant() is producing a vector
+ // splat.
+ // This situation occurs in MIPS MSA.
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+ Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
+
+ SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT,
+ getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT,
+ &Ops[0], Ops.size()));
+ return Result;
+ }
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
"APInt size does not match type size!");
@@ -1077,9 +1128,10 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
+ const TargetLowering *TLI = TM.getTargetLowering();
// Truncate (with sign-extension) the offset value to the pointer size.
- unsigned BitWidth = TM.getTargetLowering()->getPointerTy().getSizeInBits();
+ unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType());
if (BitWidth < 64)
Offset = SignExtend64(Offset, BitWidth);
@@ -1298,11 +1350,8 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
SDValue N2, const int *Mask) {
- assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
- assert(VT.isVector() && N1.getValueType().isVector() &&
- "Vector Shuffle VTs must be a vectors");
- assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
- && "Vector Shuffle VTs must have same element type");
+ assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+ "Invalid VECTOR_SHUFFLE");
// Canonicalize shuffle undef, undef -> undef
if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
@@ -1351,17 +1400,13 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
commuteShuffle(N1, N2, MaskVec);
}
- // If Identity shuffle, or all shuffle in to undef, return that node.
- bool AllUndef = true;
+ // If Identity shuffle return that node.
bool Identity = true;
for (unsigned i = 0; i != NElts; ++i) {
if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
- if (MaskVec[i] >= 0) AllUndef = false;
}
- if (Identity && NElts == N1.getValueType().getVectorNumElements())
+ if (Identity && NElts)
return N1;
- if (AllUndef)
- return getUNDEF(VT);
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
@@ -1380,7 +1425,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
ShuffleVectorSDNode *N =
- new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), N1, N2, MaskAlloc);
+ new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(),
+ dl.getDebugLoc(), N1, N2,
+ MaskAlloc);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -1403,8 +1450,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), Ops, 5,
- Code);
+ CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(),
+ dl.getDebugLoc(),
+ Ops, 5, Code);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -1447,7 +1495,8 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), dl.getDebugLoc(), Root, Label);
+ SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Root, Label);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -1510,6 +1559,26 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
return SDValue(N, 0);
}
+/// getAddrSpaceCast - Return an AddrSpaceCastSDNode.
+SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
+ unsigned SrcAS, unsigned DestAS) {
+ SDValue Ops[] = {Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1);
+ ID.AddInteger(SrcAS);
+ ID.AddInteger(DestAS);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(),
+ dl.getDebugLoc(),
+ VT, Ptr, SrcAS, DestAS);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
/// getShiftAmountOperand - Return the specified value casted to
/// the target's desired shift amount type.
@@ -1561,7 +1630,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
case ISD::SETFALSE:
case ISD::SETFALSE2: return getConstant(0, VT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: return getConstant(1, VT);
+ case ISD::SETTRUE2: {
+ const TargetLowering *TLI = TM.getTargetLowering();
+ TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector());
+ return getConstant(
+ Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
+ }
case ISD::SETOEQ:
case ISD::SETOGT:
@@ -1643,7 +1717,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
}
} else {
// Ensure that the constant occurs on the RHS.
- return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+ MVT CompVT = N1.getValueType().getSimpleVT();
+ if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT))
+ return SDValue();
+
+ return getSetCC(dl, VT, N2, N1, SwappedCond);
}
}
@@ -1942,7 +2021,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InSignBit = APInt::getSignBit(InBits);
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
@@ -2054,7 +2132,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
const APInt &RA = Rem->getAPIntValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
// The low bits of the first operand are unchanged by the srem.
@@ -2150,7 +2227,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
}
case ISD::SIGN_EXTEND:
- Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ Tmp =
+ VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
case ISD::SIGN_EXTEND_INREG:
@@ -2411,7 +2489,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), getVTList(VT));
+ SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), getVTList(VT));
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -2672,10 +2751,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Operand);
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, Operand);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Operand);
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, Operand);
}
AllNodes.push_back(N);
@@ -3073,9 +3154,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
if (VT.isSimple() && N1.getValueType().isSimple()) {
assert(VT.isVector() && N1.getValueType().isVector() &&
"Extract subvector VTs must be a vectors!");
- assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ assert(VT.getVectorElementType() ==
+ N1.getValueType().getVectorElementType() &&
"Extract subvector VTs must have the same element type!");
- assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
"Extract subvector must be from larger vector to smaller vector!");
if (isa<ConstantSDNode>(Index.getNode())) {
@@ -3086,7 +3168,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Trivial extraction.
- if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ if (VT.getSimpleVT() == N1.getSimpleValueType())
return N1;
}
break;
@@ -3244,10 +3326,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2);
}
AllNodes.push_back(N);
@@ -3316,7 +3400,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
"Insert subvector VTs must be a vectors");
assert(VT == N1.getValueType() &&
"Dest and insert subvector source types must match!");
- assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
"Insert subvector must be from smaller vector to larger vector!");
if (isa<ConstantSDNode>(Index.getNode())) {
assert((N2.getValueType().getVectorNumElements() +
@@ -3326,7 +3410,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
}
// Trivial insertion.
- if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ if (VT.getSimpleVT() == N2.getSimpleValueType())
return N2;
}
break;
@@ -3349,10 +3433,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3);
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2, N3);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3);
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2, N3);
}
AllNodes.push_back(N);
@@ -3771,7 +3857,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value, Store;
+ SDValue Value;
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, dl, DAG),
@@ -3787,7 +3873,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value, Store;
+ SDValue Store;
Store = DAG.getStore(Chain, dl, LoadValues[i],
getMemBasePlusOffset(Dst, DstOff, dl, DAG),
@@ -3800,6 +3886,24 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
&OutChains[0], OutChains.size());
}
+/// \brief Lower the call to 'memset' intrinsic function into a series of store
+/// operations.
+///
+/// \param DAG Selection DAG where lowered code is placed.
+/// \param dl Link to corresponding IR location.
+/// \param Chain Control flow dependency.
+/// \param Dst Pointer to destination memory location.
+/// \param Src Value of byte to write into the memory.
+/// \param Size Number of bytes to write.
+/// \param Align Alignment of the destination in bytes.
+/// \param isVol True if destination is volatile.
+/// \param DstPtrInfo IR information on the memory pointer.
+/// \returns New head in the control flow, if lowering was successful, empty
+/// SDValue otherwise.
+///
+/// The function tries to replace 'llvm.memset' intrinsic with several store
+/// operations and value calculation code. This is usually profitable for small
+/// memory size.
static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
@@ -4078,6 +4182,37 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
+ SDVTList VTList, SDValue* Ops, unsigned NumOps,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ // Allocate the operands array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the allocator is released.
+ // If the number of operands is less than 5 we use AtomicSDNode's internal
+ // storage.
+ SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0;
+
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
+ dl.getDebugLoc(), VTList, MemVT,
+ Ops, DynOps, NumOps, MMO,
+ Ordering, SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Cmp,
SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment,
@@ -4117,22 +4252,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
EVT VT = Cmp.getValueType();
SDVTList VTs = getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain,
- Ptr, Cmp, Swp, MMO, Ordering,
- SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -4190,22 +4311,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain,
- Ptr, Val, MMO,
- Ordering, SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -4248,21 +4355,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
SDVTList VTs = getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain,
- Ptr, MMO, Ordering, SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope);
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
@@ -4339,12 +4433,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
return SDValue(E, 0);
}
- N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, NumOps,
- MemVT, MMO);
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(),
+ dl.getDebugLoc(), VTList, Ops,
+ NumOps, MemVT, MMO);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, NumOps,
- MemVT, MMO);
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(),
+ dl.getDebugLoc(), VTList, Ops,
+ NumOps, MemVT, MMO);
}
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -4458,7 +4554,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
cast<LoadSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ExtType,
+ SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, AM, ExtType,
MemVT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -4478,6 +4575,14 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
TBAAInfo, Ranges);
}
+SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
+ SDValue Chain, SDValue Ptr,
+ MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ VT, MMO);
+}
+
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo, EVT MemVT,
@@ -4490,6 +4595,14 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
}
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
+ SDValue Chain, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+ MemVT, MMO);
+}
+
SDValue
SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM) {
@@ -4548,8 +4661,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, ISD::UNINDEXED,
- false, VT, MMO);
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
+ dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, VT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -4616,8 +4730,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, ISD::UNINDEXED,
- true, SVT, MMO);
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
+ dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, SVT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -4640,7 +4755,8 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, AM,
ST->isTruncatingStore(),
ST->getMemoryVT(),
ST->getMemOperand());
@@ -4715,10 +4831,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, NumOps);
+ N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ VTs, Ops, NumOps);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, NumOps);
+ N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ VTs, Ops, NumOps);
}
AllNodes.push_back(N);
@@ -4781,26 +4899,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
return SDValue(E, 0);
if (NumOps == 1) {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]);
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0]);
} else if (NumOps == 2) {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1]);
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1]);
} else if (NumOps == 3) {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1],
- Ops[2]);
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1], Ops[2]);
} else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops, NumOps);
+ N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ VTList, Ops, NumOps);
}
CSEMap.InsertNode(N, IP);
} else {
if (NumOps == 1) {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]);
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0]);
} else if (NumOps == 2) {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1]);
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1]);
} else if (NumOps == 3) {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1],
- Ops[2]);
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1], Ops[2]);
} else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops, NumOps);
+ N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ VTList, Ops, NumOps);
}
}
AllNodes.push_back(N);
@@ -4851,76 +4979,81 @@ SDVTList SelectionDAG::getVTList(EVT VT) {
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
- for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
- E = VTList.rend(); I != E; ++I)
- if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
- return *I;
-
- EVT *Array = Allocator.Allocate<EVT>(2);
- Array[0] = VT1;
- Array[1] = VT2;
- SDVTList Result = makeVTList(Array, 2);
- VTList.push_back(Result);
- return Result;
+ FoldingSetNodeID ID;
+ ID.AddInteger(2U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+
+ void *IP = 0;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (Result == NULL) {
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
- for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
- E = VTList.rend(); I != E; ++I)
- if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
- I->VTs[2] == VT3)
- return *I;
-
- EVT *Array = Allocator.Allocate<EVT>(3);
- Array[0] = VT1;
- Array[1] = VT2;
- Array[2] = VT3;
- SDVTList Result = makeVTList(Array, 3);
- VTList.push_back(Result);
- return Result;
+ FoldingSetNodeID ID;
+ ID.AddInteger(3U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+
+ void *IP = 0;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (Result == NULL) {
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
- for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
- E = VTList.rend(); I != E; ++I)
- if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
- I->VTs[2] == VT3 && I->VTs[3] == VT4)
- return *I;
-
- EVT *Array = Allocator.Allocate<EVT>(4);
- Array[0] = VT1;
- Array[1] = VT2;
- Array[2] = VT3;
- Array[3] = VT4;
- SDVTList Result = makeVTList(Array, 4);
- VTList.push_back(Result);
- return Result;
+ FoldingSetNodeID ID;
+ ID.AddInteger(4U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+ ID.AddInteger(VT4.getRawBits());
+
+ void *IP = 0;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (Result == NULL) {
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
- switch (NumVTs) {
- case 0: llvm_unreachable("Cannot have nodes without results!");
- case 1: return getVTList(VTs[0]);
- case 2: return getVTList(VTs[0], VTs[1]);
- case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
- case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
- default: break;
+ FoldingSetNodeID ID;
+ ID.AddInteger(NumVTs);
+ for (unsigned index = 0; index < NumVTs; index++) {
+ ID.AddInteger(VTs[index].getRawBits());
}
- for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
- E = VTList.rend(); I != E; ++I) {
- if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
- continue;
-
- if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2]))
- return *I;
+ void *IP = 0;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (Result == NULL) {
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ std::copy(VTs, VTs + NumVTs, Array);
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
+ VTListMap.InsertNode(Result, IP);
}
-
- EVT *Array = Allocator.Allocate<EVT>(NumVTs);
- std::copy(VTs, VTs+NumVTs, Array);
- SDVTList Result = makeVTList(Array, NumVTs);
- VTList.push_back(Result);
- return Result;
+ return Result->getSDVTList();
}
@@ -5410,7 +5543,8 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
}
// Allocate a new MachineSDNode.
- N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs);
// Initialize the operands list.
if (NumOps > array_lengthof(N->LocalOperands))
@@ -5916,6 +6050,12 @@ GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
TheGlobal = GA;
}
+AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT,
+ SDValue X, unsigned SrcAS,
+ unsigned DestAS)
+ : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X),
+ SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
+
MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
EVT memvt, MachineMemOperand *mmo)
: SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
@@ -6162,8 +6302,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
case ISD::ROTL:
case ISD::ROTR:
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
- getShiftAmountOperand(Operands[0].getValueType(),
- Operands[1])));
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
break;
case ISD::SIGN_EXTEND_INREG:
case ISD::FP_ROUND_INREG: {
@@ -6235,7 +6375,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
int64_t GVOffset = 0;
const TargetLowering *TLI = TM.getTargetLowering();
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- unsigned PtrWidth = TLI->getPointerTy().getSizeInBits();
+ unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
TLI->getDataLayout());
@@ -6268,6 +6408,38 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
return 0;
}
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split (or expanded) into two not necessarily identical pieces.
+std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
+ // Currently all types are split in half.
+ EVT LoVT, HiVT;
+ if (!VT.isVector()) {
+ LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
+ } else {
+ unsigned NumElements = VT.getVectorNumElements();
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+ LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
+ NumElements/2);
+ }
+ return std::make_pair(LoVT, HiVT);
+}
+
+/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
+/// low/high part.
+std::pair<SDValue, SDValue>
+SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
+ const EVT &HiVT) {
+ assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <=
+ N.getValueType().getVectorNumElements() &&
+ "More vector elements requested than available!");
+ SDValue Lo, Hi;
+ Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
+ getConstant(0, TLI->getVectorIdxTy()));
+ Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
+ getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy()));
+ return std::make_pair(Lo, Hi);
+}
+
// getAddressSpace - Return the address space this GlobalAddress belongs to.
unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
@@ -6389,7 +6561,7 @@ static void checkForCyclesHelper(const SDNode *N,
void llvm::checkForCycles(const llvm::SDNode *N) {
#ifdef XDEBUG
- assert(N && "Checking nonexistant SDNode");
+ assert(N && "Checking nonexistent SDNode");
SmallPtrSet<const SDNode*, 32> visited;
SmallPtrSet<const SDNode*, 32> checked;
checkForCyclesHelper(N, visited, checked);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b9f4381..2b2713d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/DebugInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/IntegersSubsetMapping.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -58,6 +58,7 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include <algorithm>
using namespace llvm;
@@ -1063,8 +1064,10 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
- if (isa<ConstantPointerNull>(C))
- return DAG.getConstant(0, TLI->getPointerTy());
+ if (isa<ConstantPointerNull>(C)) {
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return DAG.getConstant(0, TLI->getPointerTy(AS));
+ }
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return DAG.getConstantFP(*CFP, VT);
@@ -1268,7 +1271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
- /*isfixed=*/true, 0, 0));
+ VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
}
@@ -1617,8 +1620,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
} else
Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
} else {
- assert(CB.CC == ISD::SETCC_INVALID &&
- "Condition is undefined for to-the-range belonging check.");
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
@@ -1626,9 +1628,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
SDValue CmpOp = getValue(CB.CmpMHS);
EVT VT = CmpOp.getValueType();
- if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
- ISD::SETULE);
+ ISD::SETLE);
} else {
SDValue SUB = DAG.getNode(ISD::SUB, dl,
VT, CmpOp, DAG.getConstant(Low, VT));
@@ -1741,6 +1743,77 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
DAG.setRoot(BrCond);
}
+/// Codegen a new tail for a stack protector check ParentMBB which has had its
+/// tail spliced into a stack protector check success bb.
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering *TLI = TM.getTargetLowering();
+ EVT PtrTy = TLI->getPointerTy();
+
+ MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI->getStackProtectorIndex();
+
+ const Value *IRGuard = SPD.getGuard();
+ SDValue GuardPtr = getValue(IRGuard);
+ SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
+
+ unsigned Align =
+ TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
+ SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+ GuardPtr, MachinePointerInfo(IRGuard, 0),
+ true, false, false, Align);
+
+ SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+ StackSlotPtr,
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, false, Align);
+
+ // Perform the comparison via a subtract/getsetcc.
+ EVT VT = Guard.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot);
+
+ SDValue Cmp = DAG.getSetCC(getCurSDLoc(),
+ TLI->getSetCCResultType(*DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(0, VT),
+ ISD::SETNE);
+
+ // If the sub is not 0, then we know the guard/stackslot do not equal, so
+ // branch to failure MBB.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+ MVT::Other, StackSlot.getOperand(0),
+ Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
+ // Otherwise branch to success MBB.
+ SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(),
+ MVT::Other, BrCond,
+ DAG.getBasicBlock(SPD.getSuccessMBB()));
+
+ DAG.setRoot(Br);
+}
+
+/// Codegen the failure basic block for a stack protector check.
+///
+/// A failure stack protector machine basic block consists simply of a call to
+/// __stack_chk_fail().
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void
+SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
+ const TargetLowering *TLI = TM.getTargetLowering();
+ SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL,
+ MVT::isVoid, 0, 0, false, getCurSDLoc(),
+ false, false).second;
+ DAG.setRoot(Chain);
+}
+
/// visitBitTestHeader - This function emits necessary code to produce value
/// suitable for "bit tests"
void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
@@ -2073,7 +2146,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
CC = ISD::SETEQ;
LHS = SV; RHS = I->High; MHS = NULL;
} else {
- CC = ISD::SETCC_INVALID;
+ CC = ISD::SETLE;
LHS = I->Low; MHS = SV; RHS = I->High;
}
@@ -2107,7 +2180,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {
static APInt ComputeRange(const APInt &First, const APInt &Last) {
uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
- APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
+ APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
return (LastExt - FirstExt + 1ULL);
}
@@ -2174,7 +2247,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
const APInt &High = cast<ConstantInt>(I->High)->getValue();
- if (Low.ule(TEI) && TEI.ule(High)) {
+ if (Low.sle(TEI) && TEI.sle(High)) {
DestBBs.push_back(I->BB);
if (TEI==High)
++I;
@@ -2348,7 +2421,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// Create a CaseBlock record representing a conditional branch to
// the LHS node if the value being switched on SV is less than C.
// Otherwise, branch to LHS.
- CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+ CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
if (CR.CaseBB == SwitchBB)
visitSwitchCase(CB, SwitchBB);
@@ -2378,7 +2451,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
MachineFunction *CurMF = FuncInfo.MF;
// If target does not have legal shift left, do not emit bit tests at all.
- if (!TLI->isOperationLegal(ISD::SHL, TLI->getPointerTy()))
+ if (!TLI->isOperationLegal(ISD::SHL, PTy))
return false;
size_t numCmps = 0;
@@ -2421,7 +2494,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
// Optimize the case where all the case values fit in a
// word without having to subtract minValue. In this case,
// we can optimize away the subtraction.
- if (maxValue.ult(IntPtrBits)) {
+ if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
cmpRange = maxValue;
} else {
lowBound = minValue;
@@ -2496,12 +2569,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
/// Clusterify - Transform simple list of Cases into list of CaseRange's
size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const SwitchInst& SI) {
-
- /// Use a shorter form of declaration, and also
- /// show the we want to use CRSBuilder as Clusterifier.
- typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
-
- Clusterifier TheClusterifier;
+ size_t numCmps = 0;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
@@ -2510,27 +2578,40 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
- TheClusterifier.add(i.getCaseValueEx(), SMBB,
- BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
- }
-
- TheClusterifier.optimize();
-
- size_t numCmps = 0;
- for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
- e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
- Clusterifier::Cluster &C = *i;
- // Update edge weight for the cluster.
- unsigned W = C.first.Weight;
-
- // FIXME: Currently work with ConstantInt based numbers.
- // Changing it to APInt based is a pretty heavy for this commit.
- Cases.push_back(Case(C.first.getLow().toConstantInt(),
- C.first.getHigh().toConstantInt(), C.second, W));
+ uint32_t ExtraWeight =
+ BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0;
+
+ Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
+ SMBB, ExtraWeight));
+ }
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size() >= 2)
+ // Must recompute end() each iteration because it may be
+ // invalidated by erase if we hold on to it
+ for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ J != Cases.end(); ) {
+ const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+ const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+ MachineBasicBlock* nextBB = J->BB;
+ MachineBasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ I->ExtraWeight += J->ExtraWeight;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
- if (C.first.getLow() != C.first.getHigh())
- // A range counts double, since it requires two compares.
- ++numCmps;
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
}
return numCmps;
@@ -2859,6 +2940,21 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
setValue(&I, N); // noop cast.
}
+void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ SDValue N = getValue(SV);
+ EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+
+ unsigned SrcAS = SV->getType()->getPointerAddressSpace();
+ unsigned DestAS = I.getType()->getPointerAddressSpace();
+
+ if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
+
+ setValue(&I, N);
+}
+
void SelectionDAGBuilder::visitInsertElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
@@ -3151,10 +3247,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
}
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
- SDValue N = getValue(I.getOperand(0));
+ Value *Op0 = I.getOperand(0);
// Note that the pointer operand may be a vector of pointers. Take the scalar
// element which holds a pointer.
- Type *Ty = I.getOperand(0)->getType()->getScalarType();
+ Type *Ty = Op0->getType()->getScalarType();
+ unsigned AS = Ty->getPointerAddressSpace();
+ SDValue N = getValue(Op0);
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
@@ -3179,14 +3277,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
uint64_t Offs =
TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
- EVT PTy = TLI->getPointerTy();
+ EVT PTy = TLI->getPointerTy(AS);
unsigned PtrBits = PTy.getSizeInBits();
if (PtrBits < 64)
- OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
- TLI->getPointerTy(),
+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
DAG.getConstant(Offs, MVT::i64));
else
- OffsVal = DAG.getIntPtrConstant(Offs);
+ OffsVal = DAG.getConstant(Offs, PTy);
N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
OffsVal);
@@ -3194,7 +3291,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
// N = N + Idx * ElementSize;
- APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
+ APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
TD->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
@@ -3451,7 +3548,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDValue L =
DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
- getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+ getValue(I.getCompareOperand()).getSimpleValueType(),
InChain,
getValue(I.getPointerOperand()),
getValue(I.getCompareOperand()),
@@ -3499,7 +3596,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
SDValue L =
DAG.getAtomic(NT, dl,
- getValue(I.getValOperand()).getValueType().getSimpleVT(),
+ getValue(I.getValOperand()).getSimpleValueType(),
InChain,
getValue(I.getPointerOperand()),
getValue(I.getValOperand()),
@@ -4193,7 +4290,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const TargetLowering &TLI) {
bool IsExp10 = false;
- if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
+ if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
APFloat Ten(10.0f);
@@ -4705,14 +4802,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
TLI->getPointerTy());
SDValue Offset = DAG.getNode(ISD::ADD, sdl,
- TLI->getPointerTy(),
+ CfaArg.getValueType(),
DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
- TLI->getPointerTy()),
+ CfaArg.getValueType()),
CfaArg);
SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl,
TLI->getPointerTy(),
DAG.getConstant(0, TLI->getPointerTy()));
- setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI->getPointerTy(),
+ setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
FA, Offset));
return 0;
}
@@ -4902,7 +4999,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
- case Intrinsic::nearbyint: {
+ case Intrinsic::nearbyint:
+ case Intrinsic::round: {
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -4915,6 +5013,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
}
setValue(&I, DAG.getNode(Opcode, sdl,
@@ -4922,6 +5021,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return 0;
}
+ case Intrinsic::copysign:
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return 0;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5207,9 +5312,30 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::invariant_end:
// Discard region information.
return 0;
+ case Intrinsic::stackprotectorcheck: {
+ // Do not actually emit anything for this basic block. Instead we initialize
+ // the stack protector descriptor and export the guard variable so we can
+ // access it in FinishBasicBlock.
+ const BasicBlock *BB = I.getParent();
+ SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I);
+ ExportFromCurrentBlock(SPDescriptor.getGuard());
+
+ // Flush our exports since we are going to process a terminator.
+ (void)getControlRoot();
+ return 0;
+ }
case Intrinsic::donothing:
// ignore
return 0;
+ case Intrinsic::experimental_stackmap: {
+ visitStackmap(I);
+ return 0;
+ }
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64: {
+ visitPatchpoint(I);
+ return 0;
+ }
}
}
@@ -5274,15 +5400,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- unsigned attrInd = i - CS.arg_begin() + 1;
- Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
- Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
- Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
- Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
- Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
- Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
- Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned);
- Entry.Alignment = CS.getParamAlignment(attrInd);
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
Args.push_back(Entry);
}
@@ -5364,8 +5483,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
}
if (!Result.second.getNode()) {
- // As a special case, a null chain means that a tail call has been emitted and
- // the DAG root is already updated.
+ // As a special case, a null chain means that a tail call has been emitted
+ // and the DAG root is already updated.
HasTailCall = true;
// Since there's no actual continuation from this block, nothing can be
@@ -5445,6 +5564,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
return LoadVal;
}
+/// processIntegerCallValue - Record the value for an instruction that
+/// produces an integer result, converting the type where necessary.
+void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
+ SDValue Value,
+ bool IsSigned) {
+ EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true);
+ if (IsSigned)
+ Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
+ else
+ Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
+ setValue(&I, Value);
+}
/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
/// If so, return true and lower it, otherwise return false and it will be
@@ -5460,15 +5591,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
!I.getType()->isIntegerTy())
return false;
- const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
+ const Value *Size = I.getArgOperand(2);
+ const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
+ if (CSize && CSize->getZExtValue() == 0) {
+ EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true);
+ setValue(&I, DAG.getConstant(0, CallVT));
+ return true;
+ }
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(LHS), getValue(RHS), getValue(Size),
+ MachinePointerInfo(LHS),
+ MachinePointerInfo(RHS));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
- if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+ if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) {
bool ActuallyDoIt = true;
MVT LoadVT;
Type *LoadTy;
- switch (Size->getZExtValue()) {
+ switch (CSize->getZExtValue()) {
default:
LoadVT = MVT::Other;
LoadTy = 0;
@@ -5476,20 +5625,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
break;
case 2:
LoadVT = MVT::i16;
- LoadTy = Type::getInt16Ty(Size->getContext());
+ LoadTy = Type::getInt16Ty(CSize->getContext());
break;
case 4:
LoadVT = MVT::i32;
- LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = Type::getInt32Ty(CSize->getContext());
break;
case 8:
LoadVT = MVT::i64;
- LoadTy = Type::getInt64Ty(Size->getContext());
+ LoadTy = Type::getInt64Ty(CSize->getContext());
break;
/*
case 16:
LoadVT = MVT::v4i32;
- LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = Type::getInt32Ty(CSize->getContext());
LoadTy = VectorType::get(LoadTy, 4);
break;
*/
@@ -5503,7 +5652,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// supports unaligned loads of that type. Expanding into byte loads would
// bloat the code.
const TargetLowering *TLI = TM.getTargetLowering();
- if (ActuallyDoIt && Size->getZExtValue() > 4) {
+ if (ActuallyDoIt && CSize->getZExtValue() > 4) {
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT))
@@ -5516,8 +5665,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
ISD::SETNE);
- EVT CallVT = TLI->getValueType(I.getType(), true);
- setValue(&I, DAG.getZExtOrTrunc(Res, getCurSDLoc(), CallVT));
+ processIntegerCallValue(I, Res, false);
return true;
}
}
@@ -5526,6 +5674,148 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
return false;
}
+/// visitMemChrCall -- See if we can lower a memchr call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
+ // Verify that the prototype makes sense. void *memchr(void *, int, size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ const Value *Src = I.getArgOperand(0);
+ const Value *Char = I.getArgOperand(1);
+ const Value *Length = I.getArgOperand(2);
+ if (!Src->getType()->isPointerTy() ||
+ !Char->getType()->isIntegerTy() ||
+ !Length->getType()->isIntegerTy() ||
+ !I.getType()->isPointerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Src), getValue(Char), getValue(Length),
+ MachinePointerInfo(Src));
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
+/// optimized form. If so, return true and lower it, otherwise return false
+/// and it will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
+ // Verify that the prototype makes sense. char *strcpy(char *, char *)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isPointerTy() ||
+ !I.getType()->isPointerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1), isStpcpy);
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ DAG.setRoot(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
+ // Verify that the prototype makes sense. int strcmp(void*,void*)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isPointerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrLenCall -- See if we can lower a strlen call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
+ // Verify that the prototype makes sense. size_t strlen(char *)
+ if (I.getNumArgOperands() != 1)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0);
+ if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
+ // Verify that the prototype makes sense. size_t strnlen(char *, size_t)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
/// visitUnaryFloatCall - If a call instruction is a unary floating-point
/// operation (as expected), translate it to an SDNode with the specified opcode
/// and return true.
@@ -5644,6 +5934,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ if (visitUnaryFloatCall(I, ISD::FROUND))
+ return;
+ break;
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
@@ -5666,6 +5962,30 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitMemCmpCall(I))
return;
break;
+ case LibFunc::memchr:
+ if (visitMemChrCall(I))
+ return;
+ break;
+ case LibFunc::strcpy:
+ if (visitStrCpyCall(I, false))
+ return;
+ break;
+ case LibFunc::stpcpy:
+ if (visitStrCpyCall(I, true))
+ return;
+ break;
+ case LibFunc::strcmp:
+ if (visitStrCmpCall(I))
+ return;
+ break;
+ case LibFunc::strlen:
+ if (visitStrLenCall(I))
+ return;
+ break;
+ case LibFunc::strnlen:
+ if (visitStrNLenCall(I))
+ return;
+ break;
}
}
}
@@ -6421,6 +6741,248 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.getSrcValue(I.getArgOperand(1))));
}
+/// \brief Lower an argument list according to the target calling convention.
+///
+/// \return A tuple of <return-value, token-chain>
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
+ unsigned NumArgs, SDValue Callee,
+ bool useVoidTy) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ ImmutableCallSite CS(&CI);
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ ArgI != ArgE; ++ArgI) {
+ const Value *V = CI.getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(V);
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, AttrI);
+ Args.push_back(Entry);
+ }
+
+ Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
+ TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false,
+ /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs,
+ CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false,
+ /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc());
+
+ const TargetLowering *TLI = TM.getTargetLowering();
+ return TLI->LowerCallTo(CLI);
+}
+
+/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
+void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
+ // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+
+ assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
+
+ SDValue Callee = getValue(CI.getCalledValue());
+
+ // Lower into a call sequence with no args and no return value.
+ std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee);
+ // Set the root to the target-lowered call chain.
+ SDValue Chain = Result.second;
+ DAG.setRoot(Chain);
+
+ /// Get a call instruction from the call sequence chain.
+ /// Tail calls are not allowed.
+ SDNode *CallEnd = Chain.getNode();
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ SDNode *Call = CallEnd->getOperand(0).getNode();
+ bool hasGlue = Call->getGluedNode();
+
+ // Replace the target specific call node with the stackmap intrinsic.
+ SmallVector<SDValue, 8> Ops;
+
+ // Add the <id> and <numShadowBytes> constants.
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue tmp = getValue(CI.getOperand(i));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
+ }
+ // Push live variables for the stack map.
+ for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i)
+ Ops.push_back(getValue(CI.getArgOperand(i)));
+
+ // Push the chain (this is originally the first operand of the call, but
+ // becomes now the last or second to last operand).
+ Ops.push_back(*(Call->op_begin()));
+
+ // Push the glue flag (last operand).
+ if (hasGlue)
+ Ops.push_back(*(Call->op_end()-1));
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ // Replace the target specific call node with a STACKMAP node.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(),
+ NodeTys, Ops);
+
+ // StackMap generates no value, so nothing goes in the NodeMap.
+
+ // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+ // call sequence.
+ DAG.ReplaceAllUsesWith(Call, MN);
+
+ DAG.DeleteNode(Call);
+}
+
+/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
+void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+
+ CallingConv::ID CC = CI.getCallingConv();
+ bool isAnyRegCC = CC == CallingConv::AnyReg;
+ bool hasDef = !CI.getType()->isVoidTy();
+ SDValue Callee = getValue(CI.getOperand(2)); // <target>
+
+ // Get the real number of arguments participating in the call <numArgs>
+ unsigned NumArgs =
+ cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ assert(CI.getNumArgOperands() >= NumArgs + 4 &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs;
+ std::pair<SDValue, SDValue> Result =
+ LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC);
+
+ // Set the root to the target-lowered call chain.
+ SDValue Chain = Result.second;
+ DAG.setRoot(Chain);
+
+ SDNode *CallEnd = Chain.getNode();
+ if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+ CallEnd = CallEnd->getOperand(0).getNode();
+
+ /// Get a call instruction from the call sequence chain.
+ /// Tail calls are not allowed.
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ SDNode *Call = CallEnd->getOperand(0).getNode();
+ bool hasGlue = Call->getGluedNode();
+
+ // Replace the target specific call node with the patchable intrinsic.
+ SmallVector<SDValue, 8> Ops;
+
+ // Add the <id> and <numNopBytes> constants.
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue tmp = getValue(CI.getOperand(i));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
+ }
+ // Assume that the Callee is a constant address.
+ Ops.push_back(
+ DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
+ /*isTarget=*/true));
+
+ // Adjust <numArgs> to account for any arguments that have been passed on the
+ // stack instead.
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3);
+ NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs;
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+
+ // Add the calling convention
+ Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (isAnyRegCC)
+ for (unsigned i = 4, e = NumArgs + 4; i != e; ++i)
+ Ops.push_back(getValue(CI.getArgOperand(i)));
+
+ // Push the arguments from the call instruction.
+ SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1;
+ for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
+ Ops.push_back(*i);
+
+ // Push live variables for the stack map.
+ for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) {
+ SDValue OpVal = getValue(CI.getArgOperand(i));
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
+ Ops.push_back(
+ DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(
+ DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+ } else
+ Ops.push_back(OpVal);
+ }
+
+ // Push the register mask info.
+ if (hasGlue)
+ Ops.push_back(*(Call->op_end()-2));
+ else
+ Ops.push_back(*(Call->op_end()-1));
+
+ // Push the chain (this is originally the first operand of the call, but
+ // becomes now the last or second to last operand).
+ Ops.push_back(*(Call->op_begin()));
+
+ // Push the glue flag (last operand).
+ if (hasGlue)
+ Ops.push_back(*(Call->op_end()-1));
+
+ SDVTList NodeTys;
+ if (isAnyRegCC && hasDef) {
+ // Create the return types based on the intrinsic definition
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 3> ValueVTs;
+ ComputeValueVTs(TLI, CI.getType(), ValueVTs);
+ assert(ValueVTs.size() == 1 && "Expected only one return value type.");
+
+ // There is always a chain and a glue type at the end
+ ValueVTs.push_back(MVT::Other);
+ ValueVTs.push_back(MVT::Glue);
+ NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+ } else
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ // Replace the target specific call node with a PATCHPOINT node.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
+ getCurSDLoc(), NodeTys, Ops);
+
+ // Update the NodeMap.
+ if (hasDef) {
+ if (isAnyRegCC)
+ setValue(&CI, SDValue(MN, 0));
+ else
+ setValue(&CI, Result.first);
+ }
+
+ // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+ // call sequence. Furthermore the location of the chain and glue can change
+ // when the AnyReg calling convention is used and the intrinsic returns a
+ // value.
+ if (isAnyRegCC && hasDef) {
+ SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
+ SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ } else
+ DAG.ReplaceAllUsesWith(Call, MN);
+ DAG.DeleteNode(Call);
+}
+
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
/// implementation, which just calls LowerCall.
/// FIXME: When all targets are
@@ -6438,6 +7000,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
MyFlags.Used = CLI.IsReturnValueUsed;
if (CLI.RetSExt)
MyFlags.Flags.setSExt();
@@ -6527,7 +7090,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
- ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
i < CLI.NumFixedArgs,
i, j*Parts[j].getValueType().getStoreSize());
if (NumParts > 1 && j == 0)
@@ -6666,7 +7229,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
- ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
Ins.push_back(RetArg);
}
@@ -6677,6 +7240,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, I->getType(), ValueVTs);
bool isArgValueUsed = !I->use_empty();
+ unsigned PartBase = 0;
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
@@ -6714,8 +7278,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
- Idx-1, i*RegisterVT.getStoreSize());
+ ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
+ Idx-1, PartBase+i*RegisterVT.getStoreSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
@@ -6723,6 +7287,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setOrigAlign(1);
Ins.push_back(MyFlags);
}
+ PartBase += VT.getStoreSize();
}
}
@@ -6940,3 +7505,22 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
ConstantsOut.clear();
}
+
+/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
+/// is 0.
+MachineBasicBlock *
+SelectionDAGBuilder::StackProtectorDescriptor::
+AddSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ MachineBasicBlock *SuccMBB) {
+ // If SuccBB has not been created yet, create it.
+ if (!SuccMBB) {
+ MachineFunction *MF = ParentMBB->getParent();
+ MachineFunction::iterator BBI = ParentMBB;
+ SuccMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++BBI, SuccMBB);
+ }
+ // Add it as a successor of ParentMBB.
+ ParentMBB->addSuccessor(SuccMBB);
+ return SuccMBB;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ef73c00..835f643 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- c++ -*---===//
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===//
//
// The LLVM Compiler Infrastructure
//
@@ -26,6 +26,7 @@
namespace llvm {
+class AddrSpaceCastInst;
class AliasAnalysis;
class AllocaInst;
class BasicBlock;
@@ -84,7 +85,7 @@ class SelectionDAGBuilder {
const Instruction *CurInst;
DenseMap<const Value*, SDValue> NodeMap;
-
+
/// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
/// to preserve debug information for incoming arguments.
DenseMap<const Value*, SDValue> UnusedArgNodeMap;
@@ -182,6 +183,17 @@ private:
typedef std::vector<CaseRec> CaseRecVector;
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator()(const Case &C1, const Case &C2) {
+ assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+
struct CaseBitsCmp {
bool operator()(const CaseBits &C1, const CaseBits &C2) {
return C1.Bits > C2.Bits;
@@ -224,7 +236,7 @@ private:
struct JumpTable {
JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
-
+
/// Reg - the virtual register containing the index of the jump table entry
//. to jump to.
unsigned Reg;
@@ -278,6 +290,201 @@ private:
BitTestInfo Cases;
};
+ /// A class which encapsulates all of the information needed to generate a
+ /// stack protector check and signals to isel via its state being initialized
+ /// that a stack protector needs to be generated.
+ ///
+ /// *NOTE* The following is a high level documentation of SelectionDAG Stack
+ /// Protector Generation. The reason that it is placed here is for a lack of
+ /// other good places to stick it.
+ ///
+ /// High Level Overview of SelectionDAG Stack Protector Generation:
+ ///
+ /// Previously, generation of stack protectors was done exclusively in the
+ /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated
+ /// splitting basic blocks at the IR level to create the success/failure basic
+ /// blocks in the tail of the basic block in question. As a result of this,
+ /// calls that would have qualified for the sibling call optimization were no
+ /// longer eligible for optimization since said calls were no longer right in
+ /// the "tail position" (i.e. the immediate predecessor of a ReturnInst
+ /// instruction).
+ ///
+ /// Then it was noticed that since the sibling call optimization causes the
+ /// callee to reuse the caller's stack, if we could delay the generation of
+ /// the stack protector check until later in CodeGen after the sibling call
+ /// decision was made, we get both the tail call optimization and the stack
+ /// protector check!
+ ///
+ /// A few goals in solving this problem were:
+ ///
+ /// 1. Preserve the architecture independence of stack protector generation.
+ ///
+ /// 2. Preserve the normal IR level stack protector check for platforms like
+ /// OpenBSD for which we support platform specific stack protector
+ /// generation.
+ ///
+ /// The main problem that guided the present solution is that one can not
+ /// solve this problem in an architecture independent manner at the IR level
+ /// only. This is because:
+ ///
+ /// 1. The decision on whether or not to perform a sibling call on certain
+ /// platforms (for instance i386) requires lower level information
+ /// related to available registers that can not be known at the IR level.
+ ///
+ /// 2. Even if the previous point were not true, the decision on whether to
+ /// perform a tail call is done in LowerCallTo in SelectionDAG which
+ /// occurs after the Stack Protector Pass. As a result, one would need to
+ /// put the relevant callinst into the stack protector check success
+ /// basic block (where the return inst is placed) and then move it back
+ /// later at SelectionDAG/MI time before the stack protector check if the
+ /// tail call optimization failed. The MI level option was nixed
+ /// immediately since it would require platform specific pattern
+ /// matching. The SelectionDAG level option was nixed because
+ /// SelectionDAG only processes one IR level basic block at a time
+ /// implying one could not create a DAG Combine to move the callinst.
+ ///
+ /// To get around this problem a few things were realized:
+ ///
+ /// 1. While one can not handle multiple IR level basic blocks at the
+ /// SelectionDAG Level, one can generate multiple machine basic blocks
+ /// for one IR level basic block. This is how we handle bit tests and
+ /// switches.
+ ///
+ /// 2. At the MI level, tail calls are represented via a special return
+ /// MIInst called "tcreturn". Thus if we know the basic block in which we
+ /// wish to insert the stack protector check, we get the correct behavior
+ /// by always inserting the stack protector check right before the return
+ /// statement. This is a "magical transformation" since no matter where
+ /// the stack protector check intrinsic is, we always insert the stack
+ /// protector check code at the end of the BB.
+ ///
+ /// Given the aforementioned constraints, the following solution was devised:
+ ///
+ /// 1. On platforms that do not support SelectionDAG stack protector check
+ /// generation, allow for the normal IR level stack protector check
+ /// generation to continue.
+ ///
+ /// 2. On platforms that do support SelectionDAG stack protector check
+ /// generation:
+ ///
+ /// a. Use the IR level stack protector pass to decide if a stack
+ /// protector is required/which BB we insert the stack protector check
+ /// in by reusing the logic already therein. If we wish to generate a
+ /// stack protector check in a basic block, we place a special IR
+ /// intrinsic called llvm.stackprotectorcheck right before the BB's
+ /// returninst or if there is a callinst that could potentially be
+ /// sibling call optimized, before the call inst.
+ ///
+ /// b. Then when a BB with said intrinsic is processed, we codegen the BB
+ /// normally via SelectBasicBlock. In said process, when we visit the
+ /// stack protector check, we do not actually emit anything into the
+ /// BB. Instead, we just initialize the stack protector descriptor
+ /// class (which involves stashing information/creating the success
+ /// mbbb and the failure mbb if we have not created one for this
+ /// function yet) and export the guard variable that we are going to
+ /// compare.
+ ///
+ /// c. After we finish selecting the basic block, in FinishBasicBlock if
+ /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
+ /// initialized, we first find a splice point in the parent basic block
+ /// before the terminator and then splice the terminator of said basic
+ /// block into the success basic block. Then we code-gen a new tail for
+ /// the parent basic block consisting of the two loads, the comparison,
+ /// and finally two branches to the success/failure basic blocks. We
+ /// conclude by code-gening the failure basic block if we have not
+ /// code-gened it already (all stack protector checks we generate in
+ /// the same function, use the same failure basic block).
+ class StackProtectorDescriptor {
+ public:
+ StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0),
+ Guard(0) { }
+ ~StackProtectorDescriptor() { }
+
+ /// Returns true if all fields of the stack protector descriptor are
+ /// initialized implying that we should/are ready to emit a stack protector.
+ bool shouldEmitStackProtector() const {
+ return ParentMBB && SuccessMBB && FailureMBB && Guard;
+ }
+
+ /// Initialize the stack protector descriptor structure for a new basic
+ /// block.
+ void initialize(const BasicBlock *BB,
+ MachineBasicBlock *MBB,
+ const CallInst &StackProtCheckCall) {
+ // Make sure we are not initialized yet.
+ assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
+ "already initialized!");
+ ParentMBB = MBB;
+ SuccessMBB = AddSuccessorMBB(BB, MBB);
+ FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB);
+ if (!Guard)
+ Guard = StackProtCheckCall.getArgOperand(0);
+ }
+
+ /// Reset state that changes when we handle different basic blocks.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. The specific basic block we are generating a
+ /// stack protector for (ParentMBB).
+ ///
+ /// 2. The successor machine basic block that will contain the tail of
+ /// parent mbb after we create the stack protector check (SuccessMBB). This
+ /// BB is visited only on stack protector check success.
+ void resetPerBBState() {
+ ParentMBB = 0;
+ SuccessMBB = 0;
+ }
+
+ /// Reset state that only changes when we switch functions.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. FailureMBB since we reuse the failure code path for all stack
+ /// protector checks created in an individual function.
+ ///
+ /// 2.The guard variable since the guard variable we are checking against is
+ /// always the same.
+ void resetPerFunctionState() {
+ FailureMBB = 0;
+ Guard = 0;
+ }
+
+ MachineBasicBlock *getParentMBB() { return ParentMBB; }
+ MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
+ MachineBasicBlock *getFailureMBB() { return FailureMBB; }
+ const Value *getGuard() { return Guard; }
+
+ private:
+ /// The basic block for which we are generating the stack protector.
+ ///
+ /// As a result of stack protector generation, we will splice the
+ /// terminators of this basic block into the successor mbb SuccessMBB and
+ /// replace it with a compare/branch to the successor mbbs
+ /// SuccessMBB/FailureMBB depending on whether or not the stack protector
+ /// was violated.
+ MachineBasicBlock *ParentMBB;
+
+ /// A basic block visited on stack protector check success that contains the
+ /// terminators of ParentMBB.
+ MachineBasicBlock *SuccessMBB;
+
+ /// This basic block visited on stack protector check failure that will
+ /// contain a call to __stack_chk_fail().
+ MachineBasicBlock *FailureMBB;
+
+ /// The guard variable which we will compare against the stored value in the
+ /// stack protector stack slot.
+ const Value *Guard;
+
+ /// Add a successor machine basic block to ParentMBB. If the successor mbb
+ /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
+ /// block will be created.
+ MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ MachineBasicBlock *SuccMBB = 0);
+ };
+
private:
const TargetMachine &TM;
public:
@@ -295,6 +502,9 @@ public:
/// BitTestCases - Vector of BitTestBlock structures used to communicate
/// SwitchInst code generation information.
std::vector<BitTestBlock> BitTestCases;
+ /// A StackProtectorDescriptor structure used to communicate stack protector
+ /// information in between SelectBasicBlock and FinishBasicBlock.
+ StackProtectorDescriptor SPDescriptor;
// Emit PHI-node-operand constants only once even if used by multiple
// PHI nodes.
@@ -305,9 +515,9 @@ public:
FunctionLoweringInfo &FuncInfo;
/// OptLevel - What optimization level we're generating code for.
- ///
+ ///
CodeGenOpt::Level OptLevel;
-
+
/// GFI - Garbage collection metadata for the function.
GCFunctionInfo *GFI;
@@ -389,7 +599,7 @@ public:
assert(N.getNode() == 0 && "Already set a value for this node!");
N = NewN;
}
-
+
void setUnusedArgValue(const Value *V, SDValue NewN) {
SDValue &N = UnusedArgNodeMap[V];
assert(N.getNode() == 0 && "Already set a value for this node!");
@@ -410,6 +620,12 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
MachineBasicBlock *LandingPad = NULL);
+ std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI,
+ unsigned ArgIdx,
+ unsigned NumArgs,
+ SDValue Callee,
+ bool useVoidTy = false);
+
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
/// references that ned to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
@@ -451,6 +667,9 @@ private:
public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
+ void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB);
+ void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
void visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
@@ -461,7 +680,7 @@ public:
void visitJumpTable(JumpTable &JT);
void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB);
-
+
private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
@@ -502,6 +721,7 @@ private:
void visitPtrToInt(const User &I);
void visitIntToPtr(const User &I);
void visitBitCast(const User &I);
+ void visitAddrSpaceCast(const User &I);
void visitExtractElement(const User &I);
void visitInsertElement(const User &I);
@@ -523,6 +743,11 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
+ bool visitMemChrCall(const CallInst &I);
+ bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
+ bool visitStrCmpCall(const CallInst &I);
+ bool visitStrLenCall(const CallInst &I);
+ bool visitStrNLenCall(const CallInst &I);
bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
@@ -535,6 +760,8 @@ private:
void visitVAArg(const VAArgInst &I);
void visitVAEnd(const CallInst &I);
void visitVACopy(const CallInst &I);
+ void visitStackmap(const CallInst &I);
+ void visitPatchpoint(const CallInst &I);
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
@@ -543,10 +770,13 @@ private:
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
}
+ void processIntegerCallValue(const Instruction &I,
+ SDValue Value, bool IsSigned);
+
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
/// EmitFuncArgumentDbgValue - If V is an function argument then create
- /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
int64_t Offset, const SDValue &N);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index d8ee221..c04a08d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -142,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCEIL: return "fceil";
case ISD::FRINT: return "frint";
case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FROUND: return "fround";
case ISD::FEXP: return "fexp";
case ISD::FEXP2: return "fexp2";
case ISD::FLOG: return "flog";
@@ -223,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::BITCAST: return "bitcast";
+ case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP32: return "fp16_to_fp32";
case ISD::FP32_TO_FP16: return "fp32_to_fp16";
@@ -484,6 +486,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << " " << offset;
if (unsigned int TF = BA->getTargetFlags())
OS << " [TF=" << TF << ']';
+ } else if (const AddrSpaceCastSDNode *ASC =
+ dyn_cast<AddrSpaceCastSDNode>(this)) {
+ OS << '['
+ << ASC->getSrcAddressSpace()
+ << " -> "
+ << ASC->getDestAddressSpace()
+ << ']';
}
if (unsigned Order = getIROrder())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 01da51c..3a0cfa1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -223,6 +223,44 @@ defaultListDAGScheduler("default", "Best scheduler for the target",
namespace llvm {
//===--------------------------------------------------------------------===//
+ /// \brief This class is used by SelectionDAGISel to temporarily override
+ /// the optimization level on a per-function basis.
+ class OptLevelChanger {
+ SelectionDAGISel &IS;
+ CodeGenOpt::Level SavedOptLevel;
+ bool SavedFastISel;
+
+ public:
+ OptLevelChanger(SelectionDAGISel &ISel,
+ CodeGenOpt::Level NewOptLevel) : IS(ISel) {
+ SavedOptLevel = IS.OptLevel;
+ if (NewOptLevel == SavedOptLevel)
+ return;
+ IS.OptLevel = NewOptLevel;
+ IS.TM.setOptLevel(NewOptLevel);
+ SavedFastISel = IS.TM.Options.EnableFastISel;
+ if (NewOptLevel == CodeGenOpt::None)
+ IS.TM.setFastISel(true);
+ DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction()->getName() << "\n");
+ DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
+ << " ; After: -O" << NewOptLevel << "\n");
+ }
+
+ ~OptLevelChanger() {
+ if (IS.OptLevel == SavedOptLevel)
+ return;
+ DEBUG(dbgs() << "\nRestoring optimization level for Function "
+ << IS.MF->getFunction()->getName() << "\n");
+ DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel
+ << " ; After: -O" << SavedOptLevel << "\n");
+ IS.OptLevel = SavedOptLevel;
+ IS.TM.setOptLevel(SavedOptLevel);
+ IS.TM.setFastISel(SavedFastISel);
+ }
+ };
+
+ //===--------------------------------------------------------------------===//
/// createDefaultScheduler - This creates an instruction scheduler appropriate
/// for the target.
ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
@@ -230,7 +268,7 @@ namespace llvm {
const TargetLowering *TLI = IS->getTargetLowering();
const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
- if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() ||
+ if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() ||
TLI->getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::RegPressure)
@@ -356,6 +394,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
const Function &Fn = *mf.getFunction();
const TargetInstrInfo &TII = *TM.getInstrInfo();
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetLowering *TLI = TM.getTargetLowering();
MF = &mf;
RegInfo = &MF->getRegInfo();
@@ -369,11 +408,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
ST.resetSubtargetFeatures(MF);
TM.resetTargetOptions(MF);
+ // Reset OptLevel to None for optnone functions.
+ CodeGenOpt::Level NewOptLevel = OptLevel;
+ if (Fn.hasFnAttribute(Attribute::OptimizeNone))
+ NewOptLevel = CodeGenOpt::None;
+ OptLevelChanger OLC(*this, NewOptLevel);
+
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
- CurDAG->init(*MF, TTI);
+ CurDAG->init(*MF, TTI, TLI);
FuncInfo->set(Fn, *MF);
if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -408,9 +453,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
EntryMBB->insert(EntryMBB->begin(), MI);
else {
MachineInstr *Def = RegInfo->getVRegDef(Reg);
- MachineBasicBlock::iterator InsertPos = Def;
- // FIXME: VR def may not be in entry block.
- Def->getParent()->insert(llvm::next(InsertPos), MI);
+ if (Def) {
+ MachineBasicBlock::iterator InsertPos = Def;
+ // FIXME: VR def may not be in entry block.
+ Def->getParent()->insert(llvm::next(InsertPos), MI);
+ } else
+ DEBUG(dbgs() << "Dropping debug info for dead vreg"
+ << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
}
// If Reg is live-in then update debug info to track its copy in a vreg.
@@ -422,7 +471,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MachineBasicBlock::iterator InsertPos = Def;
const MDNode *Variable =
MI->getOperand(MI->getNumOperands()-1).getMetadata();
- bool IsIndirect = MI->getOperand(1).isImm();
+ bool IsIndirect = MI->isIndirectDebugValue();
unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
// Def is never a terminator here, so it is ok to increment InsertPos.
BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
@@ -497,6 +546,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (J == E) break;
To = J->second;
}
+ // Make sure the new register has a sufficiently constrained register class.
+ if (TargetRegisterInfo::isVirtualRegister(From) &&
+ TargetRegisterInfo::isVirtualRegister(To))
+ MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
MRI.replaceRegWith(From, To);
}
@@ -617,6 +670,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
+ CurDAG->NewNodesMustHaveLegalTypes = true;
+
if (Changed) {
if (ViewDAGCombineLT)
CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
@@ -1140,6 +1195,91 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
delete FastIS;
SDB->clearDanglingDebugInfo();
+ SDB->SPDescriptor.resetPerFunctionState();
+}
+
+/// Given that the input MI is before a partial terminator sequence TSeq, return
+/// true if M + TSeq also a partial terminator sequence.
+///
+/// A Terminator sequence is a sequence of MachineInstrs which at this point in
+/// lowering copy vregs into physical registers, which are then passed into
+/// terminator instructors so we can satisfy ABI constraints. A partial
+/// terminator sequence is an improper subset of a terminator sequence (i.e. it
+/// may be the whole terminator sequence).
+static bool MIIsInTerminatorSequence(const MachineInstr *MI) {
+ // If we do not have a copy or an implicit def, we return true if and only if
+ // MI is a debug value.
+ if (!MI->isCopy() && !MI->isImplicitDef())
+ // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
+ // physical registers if there is debug info associated with the terminator
+ // of our mbb. We want to include said debug info in our terminator
+ // sequence, so we return true in that case.
+ return MI->isDebugValue();
+
+ // We have left the terminator sequence if we are not doing one of the
+ // following:
+ //
+ // 1. Copying a vreg into a physical register.
+ // 2. Copying a vreg into a vreg.
+ // 3. Defining a register via an implicit def.
+
+ // OPI should always be a register definition...
+ MachineInstr::const_mop_iterator OPI = MI->operands_begin();
+ if (!OPI->isReg() || !OPI->isDef())
+ return false;
+
+ // Defining any register via an implicit def is always ok.
+ if (MI->isImplicitDef())
+ return true;
+
+ // Grab the copy source...
+ MachineInstr::const_mop_iterator OPI2 = OPI;
+ ++OPI2;
+ assert(OPI2 != MI->operands_end()
+ && "Should have a copy implying we should have 2 arguments.");
+
+ // Make sure that the copy dest is not a vreg when the copy source is a
+ // physical register.
+ if (!OPI2->isReg() ||
+ (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) &&
+ TargetRegisterInfo::isPhysicalRegister(OPI2->getReg())))
+ return false;
+
+ return true;
+}
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+static MachineBasicBlock::iterator
+FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) {
+ MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
+ //
+ if (SplitPoint == BB->begin())
+ return SplitPoint;
+
+ MachineBasicBlock::iterator Start = BB->begin();
+ MachineBasicBlock::iterator Previous = SplitPoint;
+ --Previous;
+
+ while (MIIsInTerminatorSequence(Previous)) {
+ SplitPoint = Previous;
+ if (Previous == Start)
+ break;
+ --Previous;
+ }
+
+ return SplitPoint;
}
void
@@ -1152,11 +1292,13 @@ SelectionDAGISel::FinishBasicBlock() {
<< FuncInfo->PHINodesToUpdate[i].first
<< ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+ const bool MustUpdatePHINodes = SDB->SwitchCases.empty() &&
+ SDB->JTCases.empty() &&
+ SDB->BitTestCases.empty();
+
// Next, now that we know what the last MBB the LLVM BB expanded is, update
// PHI nodes in successors.
- if (SDB->SwitchCases.empty() &&
- SDB->JTCases.empty() &&
- SDB->BitTestCases.empty()) {
+ if (MustUpdatePHINodes) {
for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
assert(PHI->isPHI() &&
@@ -1165,9 +1307,54 @@ SelectionDAGISel::FinishBasicBlock() {
continue;
PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
}
- return;
}
+ // Handle stack protector.
+ if (SDB->SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint =
+ FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc());
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
+ SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt = ParentMBB->end();
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB();
+ if (!FailureMBB->size()) {
+ FuncInfo->MBB = FailureMBB;
+ FuncInfo->InsertPt = FailureMBB->end();
+ SDB->visitSPDescriptorFailure(SDB->SPDescriptor);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ }
+
+ // If we updated PHI Nodes, return early.
+ if (MustUpdatePHINodes)
+ return;
+
for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
// Lower header first, if it wasn't already lowered
if (!SDB->BitTestCases[i].Emitted) {
@@ -1741,15 +1928,15 @@ WalkChainUsers(const SDNode *ChainedNode,
SDNode *User = *UI;
+ if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
+ continue;
+
// If we see an already-selected machine node, then we've gone beyond the
// pattern that we're selecting down into the already selected chunk of the
// DAG.
- if (User->isMachineOpcode() ||
- User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
- continue;
-
unsigned UserOpcode = User->getOpcode();
- if (UserOpcode == ISD::CopyToReg ||
+ if (User->isMachineOpcode() ||
+ UserOpcode == ISD::CopyToReg ||
UserOpcode == ISD::CopyFromReg ||
UserOpcode == ISD::INLINEASM ||
UserOpcode == ISD::EH_LABEL ||
@@ -1886,7 +2073,6 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
}
}
- SDValue Res;
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
@@ -1962,6 +2148,18 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N == RecordedNodes[RecNo].first;
}
+/// CheckChildSame - Implements OP_CheckChildXSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
+ RecordedNodes);
+}
+
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
@@ -2076,6 +2274,13 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckSame:
Result = !::CheckSame(Table, Index, N, RecordedNodes);
return Index;
+ case SelectionDAGISel::OPC_CheckChild0Same:
+ case SelectionDAGISel::OPC_CheckChild1Same:
+ case SelectionDAGISel::OPC_CheckChild2Same:
+ case SelectionDAGISel::OPC_CheckChild3Same:
+ Result = !::CheckChildSame(Table, Index, N, RecordedNodes,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
+ return Index;
case SelectionDAGISel::OPC_CheckPatternPredicate:
Result = !::CheckPatternPredicate(Table, Index, SDISel);
return Index;
@@ -2373,6 +2578,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckSame:
if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
continue;
+
+ case OPC_CheckChild0Same: case OPC_CheckChild1Same:
+ case OPC_CheckChild2Same: case OPC_CheckChild3Same:
+ if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes,
+ Opcode-OPC_CheckChild0Same))
+ break;
+ continue;
+
case OPC_CheckPatternPredicate:
if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
continue;
@@ -2432,7 +2645,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
}
case OPC_SwitchType: {
- MVT CurNodeVT = N.getValueType().getSimpleVT();
+ MVT CurNodeVT = N.getSimpleValueType();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
while (1) {
@@ -2544,7 +2757,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_EmitConvertToTarget: {
// Convert from IMM/FPIMM to target version.
unsigned RecNo = MatcherTable[MatcherIndex++];
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget");
SDValue Imm = RecordedNodes[RecNo].first;
if (Imm->getOpcode() == ISD::Constant) {
@@ -2569,7 +2782,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Read all of the chained nodes.
unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
// FIXME: What if other value results of the node have uses not matched
@@ -2606,7 +2819,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Read all of the chained nodes.
for (unsigned i = 0; i != NumChains; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
// FIXME: What if other value results of the node have uses not matched
@@ -2633,7 +2846,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_EmitCopyToReg: {
unsigned RecNo = MatcherTable[MatcherIndex++];
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
if (InputChain.getNode() == 0)
@@ -2650,7 +2863,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_EmitNodeXForm: {
unsigned XFormNo = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm");
SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
continue;
@@ -2827,7 +3040,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults");
GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
}
continue;
@@ -2844,7 +3057,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (ResSlot & 128)
ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
- assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+ assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch");
SDValue Res = RecordedNodes[ResSlot].first;
assert(i < NodeToMatch->getNumValues() &&
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e3c6306..82b068d 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -64,13 +64,29 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
return isUsedByReturnOnly(Node, Chain);
}
+/// \brief Set CallLoweringInfo attribute flags based on a call instruction
+/// and called function attributes.
+void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned AttrIdx) {
+ isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
+ isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
+ isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
+ isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
+ isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
+ isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
+ Alignment = CS->getParamAlignment(AttrIdx);
+}
/// Generate a libcall taking the given operands as arguments and returning a
/// result of type RetVT.
-SDValue TargetLowering::makeLibCall(SelectionDAG &DAG,
- RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps,
- bool isSigned, SDLoc dl) const {
+std::pair<SDValue, SDValue>
+TargetLowering::makeLibCall(SelectionDAG &DAG,
+ RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, SDLoc dl,
+ bool doesNotReturn,
+ bool isReturnValueUsed) const {
TargetLowering::ArgListTy Args;
Args.reserve(NumOps);
@@ -89,11 +105,9 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG,
CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, getLibcallCallingConv(LC),
/*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
- std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI);
-
- return CallInfo.first;
+ doesNotReturn, isReturnValueUsed, Callee, Args,
+ DAG, dl);
+ return LowerCallTo(CLI);
}
@@ -183,14 +197,16 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = { NewLHS, NewRHS };
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
+ dl).first;
NewRHS = DAG.getConstant(0, RetVT);
CCCode = getCmpLibcallCC(LC1);
if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
SDValue Tmp = DAG.getNode(ISD::SETCC, dl,
getSetCCResultType(*DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
+ dl).first;
NewLHS = DAG.getNode(ISD::SETCC, dl,
getSetCCResultType(*DAG.getContext(), RetVT), NewLHS,
NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
@@ -632,6 +648,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
NarrowShl));
}
+ // Repeat the SHL optimization above in cases where an extension
+ // intervenes: (shl (anyext (shr x, c1)), c2) to
+ // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
+ // aren't demanded (as above) and that the shifted upper c1 bits of
+ // x aren't demanded.
+ if (InOp.hasOneUse() &&
+ InnerOp.getOpcode() == ISD::SRL &&
+ InnerOp.hasOneUse() &&
+ isa<ConstantSDNode>(InnerOp.getOperand(1))) {
+ uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
+ ->getZExtValue();
+ if (InnerShAmt < ShAmt &&
+ InnerShAmt < InnerBits &&
+ NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
+ NewMask.trunc(ShAmt) == 0) {
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt,
+ Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
+ NewExt, NewSA));
+ }
+ }
}
KnownZero <<= SA->getZExtValue();
@@ -722,13 +763,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
- if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits)
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
Op.getOperand(0),
Op.getOperand(1)));
- } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
- KnownOne |= HighBits;
+
+ int Log2 = NewMask.exactLogBase2();
+ if (Log2 >= 0) {
+ // The bit must come from the sign.
+ SDValue NewSA =
+ TLO.DAG.getConstant(BitWidth - 1 - Log2,
+ Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0), NewSA));
}
+
+ if (KnownOne.intersects(SignBit))
+ // New bits are known one.
+ KnownOne |= HighBits;
}
break;
case ISD::SIGN_EXTEND_INREG: {
@@ -1077,13 +1129,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
case ISD::SETFALSE:
case ISD::SETFALSE2: return DAG.getConstant(0, VT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ case ISD::SETTRUE2: {
+ TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector());
+ return DAG.getConstant(
+ Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
+ }
}
// Ensure that the constant occurs on the RHS, and fold constant
// comparisons.
- if (isa<ConstantSDNode>(N0.getNode()))
- return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+ ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
+ if (isa<ConstantSDNode>(N0.getNode()) &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
@@ -1178,6 +1237,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
if (DCI.isBeforeLegalize() &&
+ !ISD::isSignedIntSetCC(Cond) &&
N0.getOpcode() == ISD::AND && C1 == 0 &&
N0.getNode()->hasOneUse() &&
isa<LoadSDNode>(N0.getOperand(0)) &&
@@ -1322,7 +1382,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
CC = ISD::getSetCCInverse(CC,
N0.getOperand(0).getValueType().isInteger());
- return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
if ((N0.getOpcode() == ISD::XOR ||
@@ -1759,16 +1821,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
if (ValueHasExactlyOneBitSet(N1, DAG)) {
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
- SDValue Zero = DAG.getConstant(0, N1.getValueType());
- return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(Cond, N0.getSimpleValueType())) {
+ SDValue Zero = DAG.getConstant(0, N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ }
}
}
if (N1.getOpcode() == ISD::AND)
if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
if (ValueHasExactlyOneBitSet(N0, DAG)) {
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
- SDValue Zero = DAG.getConstant(0, N0.getValueType());
- return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(Cond, N1.getSimpleValueType())) {
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ }
}
}
}
@@ -1993,7 +2061,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const {
- if (Constraint[0] != '{')
+ if (Constraint.empty() || Constraint[0] != '{')
return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
@@ -2142,8 +2210,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
break;
}
} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
- OpInfo.ConstraintVT = MVT::getIntegerVT(
- 8*getDataLayout()->getPointerSize(PT->getAddressSpace()));
+ unsigned PtrSize
+ = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace());
+ OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
} else {
OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
}