aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp945
1 files changed, 798 insertions, 147 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 741e25e..3fcafdc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -70,6 +70,8 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ PPCRegInfo = TM.getRegisterInfo();
+ PPCII = TM.getInstrInfo();
setPow2DivIsCheap();
@@ -115,6 +117,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -149,26 +152,58 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget->hasFSQRT()) {
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- }
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFPRND()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+
+ // frin does not implement "ties to even." Thus, this is safe only in
+ // fast-math mode.
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+
+ // These need to set FE_INEXACT, and use a custom inserter.
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ }
+ }
+
// PowerPC does not have BSWAP, CTPOP or CTTZ
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
- setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ if (Subtarget->hasPOPCNTD()) {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
+ } else {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ }
+
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
@@ -211,6 +246,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
@@ -290,15 +333,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- // FIXME: disable this lowered code. This generates 64-bit register values,
- // and we don't model the fact that the top part is clobbered by calls. We
- // need to flag these together so that the value isn't live across a call.
- //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
+ // With the instructions enabled under FPCVT, we can do everything.
+ if (PPCSubTarget.hasFPCVT()) {
+ if (Subtarget->has64BitSupport()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ }
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ }
+
if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
@@ -420,6 +476,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ }
+
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -452,7 +514,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setBooleanContents(ZeroOrOneBooleanContent);
- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+ // Altivec instructions set fields to all zeros or all ones.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
@@ -470,6 +533,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::BR_CC);
setTargetDAGCombine(ISD::BSWAP);
+ // Use reciprocal estimates.
+ if (TM.Options.UnsafeFPMath) {
+ setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::FSQRT);
+ }
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -511,7 +580,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
MaxStoresPerMemmoveOptSize = 8;
setPrefFunctionAlignment(4);
- BenefitFromCodePlacementOpt = true;
}
}
@@ -542,6 +610,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FRE: return "PPCISD::FRE";
+ case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
@@ -557,16 +627,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
- case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
- case PPCISD::STD_32: return "PPCISD::STD_32";
- case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
- case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
- case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
- case PPCISD::NOP: return "PPCISD::NOP";
+ case PPCISD::CALL: return "PPCISD::CALL";
+ case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
- case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
- case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
+ case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFCR: return "PPCISD::MFCR";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
@@ -576,10 +643,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::MFFS: return "PPCISD::MFFS";
- case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
- case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
- case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
@@ -1031,7 +1095,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1080,7 +1144,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1143,7 +1207,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1181,6 +1245,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SelectionDAG &DAG) const {
if (DisablePPCPreinc) return false;
+ bool isLoad = true;
SDValue Ptr;
EVT VT;
unsigned Alignment;
@@ -1192,6 +1257,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
Alignment = ST->getAlignment();
+ isLoad = false;
} else
return false;
@@ -1199,7 +1265,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
return false;
- if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+ // Common code will reject creating a pre-inc form if the base pointer
+ // is a frame index, or if N is a store and the base pointer is either
+ // the same as or a predecessor of the value being stored. Check for
+ // those situations here, and try with swapped Base/Offset instead.
+ bool Swap = false;
+
+ if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+ Swap = true;
+ else if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+ Swap = true;
+ }
+
+ if (Swap)
+ std::swap(Base, Offset);
+
AM = ISD::PRE_INC;
return true;
}
@@ -3105,7 +3189,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
- unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+ unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
@@ -3238,8 +3322,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Other);
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
- CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+ CallOpc = PPCISD::BCTRL;
Callee.setNode(0);
+ // Add use of X11 (holding environment pointer)
+ if (isSVR4ABI && isPPC64)
+ Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
@@ -3378,7 +3465,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- if (CallOpc == PPCISD::BCTRL_SVR4) {
+ if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
@@ -3389,9 +3476,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
// Otherwise insert NOP for non-local calls.
- CallOpc = PPCISD::CALL_NOP_SVR4;
+ CallOpc = PPCISD::CALL_NOP;
}
}
@@ -4564,6 +4651,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
}
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4572,10 +4674,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ // We might be able to do better than this under some circumstances, but in
+ // general, fsel-based lowering of select is a finite-math-only optimization.
+ // For more information, see section F.3 of the 2.06 ISA specification.
+ if (!DAG.getTarget().Options.NoInfsFPMath ||
+ !DAG.getTarget().Options.NoNaNsFPMath)
+ return Op;
- // Cannot handle SETEQ/SETNE.
- if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
EVT CmpVT = Op.getOperand(0).getValueType();
@@ -4585,9 +4691,20 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
+ SDValue Sel1;
if (isFloatingPointZero(RHS))
switch (CC) {
default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
@@ -4610,30 +4727,41 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp;
switch (CC) {
default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOGE:
case ISD::SETGE:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOLE:
case ISD::SETLE:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
}
return Op;
}
@@ -4651,37 +4779,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- PPCISD::FCTIDZ,
+ (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
- Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
break;
}
// Convert the FP value to an int value through memory.
- SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+ bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+ int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
// Emit a store to the stack slot.
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Chain;
+ if (i32Stack) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+ SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ MVT::i32, MMO);
+ } else
+ Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MPI, false, false, 0);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias.
- if (Op.getValueType() == MVT::i32)
+ if (Op.getValueType() == MVT::i32 && !i32Stack) {
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
- return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+ MPI = MachinePointerInfo();
+ }
+
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
false, false, false, 0);
}
-SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDU : PPCISD::FCFID);
+ MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT::f32 : MVT::f64;
+
if (Op.getOperand(0).getValueType() == MVT::i64) {
SDValue SINT = Op.getOperand(0);
// When converting to single-precision, we actually need to convert
@@ -4695,6 +4858,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
+ !PPCSubTarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
@@ -4728,44 +4892,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
+
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
- SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
- if (Op.getValueType() == MVT::f32)
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
- "Unhandled SINT_TO_FP type in custom expander!");
+ "Unhandled INT_TO_FP type in custom expander!");
// Since we only generate this in 64-bit mode, we can take advantage of
// 64-bit registers. In particular, sign extend the input value into the
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+ SDValue Ld;
+ if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, 4, 4);
+ SDValue Ops[] = { Store, FIdx };
+ Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, 2, MVT::i32, MMO);
+ } else {
+ assert(PPCSubTarget.isPPC64() &&
+ "i32->FP without LFIWAX supported only on PPC64");
+
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
Op.getOperand(0));
- // STD the extended value into the stack slot.
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore, 8, 8);
- SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
- SDValue Store =
- DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
- Ops, 4, MVT::i64, MMO);
- // Load the value as a double.
- SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
- false, false, false, 0);
+ // STD the extended value into the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ // Load the value as a double.
+ Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, false, 0);
+ }
// FCFID it and return it.
- SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
- if (Op.getValueType() == MVT::f32)
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
@@ -5560,11 +5749,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
Op.getDebugLoc());
- case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
// Lower 64-bit shifts.
@@ -5618,50 +5811,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
MVT::f64, N->getOperand(0),
DAG.getIntPtrConstant(1));
- // This sequence changes FPSCR to do round-to-zero, adds the two halves
- // of the long double, and puts FPSCR back the way it was. We do not
- // actually model FPSCR.
- std::vector<EVT> NodeTys;
- SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
-
- NodeTys.push_back(MVT::f64); // Return register
- NodeTys.push_back(MVT::Glue); // Returns a flag for later insns
- Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
- MFFSreg = Result.getValue(0);
- InFlag = Result.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = DAG.getConstant(31, MVT::i32);
- Ops[1] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
- InFlag = Result.getValue(0);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = DAG.getConstant(30, MVT::i32);
- Ops[1] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
- InFlag = Result.getValue(0);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::f64); // result of add
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = Lo;
- Ops[1] = Hi;
- Ops[2] = InFlag;
- Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
- FPreg = Result.getValue(0);
- InFlag = Result.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::f64);
- Ops[0] = DAG.getConstant(1, MVT::i32);
- Ops[1] = MFFSreg;
- Ops[2] = FPreg;
- Ops[3] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
- FPreg = Result.getValue(0);
+ // Add the two halves of the long double in round-to-zero mode.
+ SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
// We know the low half is about to be thrown away, so just use something
// convenient.
@@ -5753,7 +5904,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -5857,7 +6008,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
.addReg(TmpReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
.addReg(Tmp3Reg).addReg(Tmp2Reg);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
.addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -5872,9 +6023,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
return BB;
}
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+ MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (PPCSubTarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // FIXME: When we also support base pointers, that register must also be
+ // restored here.
+
+ // Reload TOC
+ if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// To "insert" these instructions we actually have to insert their
@@ -5887,29 +6267,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8)) {
- unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
- PPC::ISEL8 : PPC::ISEL;
- unsigned SelectPred = MI->getOperand(4).getImm();
- DebugLoc dl = MI->getDebugLoc();
+ SmallVector<MachineOperand, 2> Cond;
+ Cond.push_back(MI->getOperand(4));
+ Cond.push_back(MI->getOperand(1));
- // The SelectPred is ((BI << 5) | BO) for a BCC
- unsigned BO = SelectPred & 0xF;
- assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
-
- unsigned TrueOpNo, FalseOpNo;
- if (BO == 12) {
- TrueOpNo = 2;
- FalseOpNo = 3;
- } else {
- TrueOpNo = 3;
- FalseOpNo = 2;
- SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
- }
-
- BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(TrueOpNo).getReg())
- .addReg(MI->getOperand(FalseOpNo).getReg())
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+ DebugLoc dl = MI->getDebugLoc();
+ PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
+ MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6142,7 +6506,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -6245,6 +6609,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = exitMBB;
BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
.addReg(ShiftReg);
+ } else if (MI->getOpcode() == PPC::FADDrtz) {
+ // This pseudo performs an FADD with rounding mode temporarily forced
+ // to round-to-zero. We emit this via custom inserter since the FPSCR
+ // is not modeled at the SelectionDAG level.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src1 = MI->getOperand(1).getReg();
+ unsigned Src2 = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+ // Set rounding mode to round-to-zero.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+ // Perform addition.
+ BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+ // Restore FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::FRINDrint ||
+ MI->getOpcode() == PPC::FRINSrint) {
+ bool isf32 = MI->getOpcode() == PPC::FRINSrint;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+ // Perform the rounding.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
+ .addReg(Src);
+
+ // Compare the results.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
+ .addReg(Dest).addReg(Src);
+
+ // If the results were not equal, then set the FPSCR XX bit.
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+
+ BB->addSuccessor(midMBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+
+ // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
+ // the FI bit here because that will not automatically set XX also,
+ // and XX is what libm interprets as the FE_INEXACT flag.
+ BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+
+ BB->addSuccessor(exitMBB);
+
+ BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -6257,6 +6690,139 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
+SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
+ (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue FPOne =
+ DAG.getConstantFP(1.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPOne, FPOne, FPOne, FPOne);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal sqrt, we need to find the zero of the function:
+ // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+ // =>
+ // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+ // As a result, we precompute A/2 prior to the iteration loop.
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue FPThreeHalves =
+ DAG.getConstantFP(1.5, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPThreeHalves, FPThreeHalves,
+ FPThreeHalves, FPThreeHalves);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
@@ -6283,7 +6849,72 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
}
break;
+ case ISD::FDIV: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+ N->getValueType(0), RV);
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+ N->getValueType(0), RV,
+ N->getOperand(1).getOperand(1));
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ }
+ SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+
+ }
+ break;
+ case ISD::FSQRT: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
+ // reciprocal sqrt.
+ SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAGCombineFastRecip(RV, DCI);
+ if (RV.getNode() != 0)
+ return RV;
+ }
+
+ }
+ break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
@@ -6330,8 +6961,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
- Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
- N->getOperand(2), N->getOperand(3));
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
return Val;
}
@@ -6341,7 +6979,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
- N->getOperand(1).getValueType() == MVT::i16)) {
+ N->getOperand(1).getValueType() == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getOperand(1).getValueType() == MVT::i64))) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
if (BSwapOp.getValueType() == MVT::i16)
@@ -6362,7 +7003,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
- (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getValueType(0) == MVT::i64))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the byte-swapping load.
@@ -6373,8 +7017,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
SDValue BSLoad =
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
- DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
- LD->getMemoryVT(), LD->getMemOperand());
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
@@ -6631,6 +7276,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
@@ -6815,13 +7463,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects()) &&
- MFI->getStackSize() &&
- !MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked);
- unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
- (is31 ? PPC::R31 : PPC::R1);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)