aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp197
1 files changed, 155 insertions, 42 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e2433e7..664dd12 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -228,11 +228,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
-
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
@@ -285,8 +280,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ if (Subtarget->isSVR4ABI() && !isPPC64)
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+
// Use the default implementation.
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
@@ -397,6 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
@@ -491,6 +492,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
}
if (Subtarget->has64BitSupport()) {
@@ -626,7 +630,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
- case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
@@ -1031,6 +1035,46 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
return false;
}
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+ // FIXME: This does not handle the LWA case.
+ if (VT != MVT::i64)
+ return;
+
+ // NOTE: We'll exclude negative FIs here, which come from argument
+ // lowering, because there are no known test cases triggering this problem
+ // using packed structures (or similar). We can remove this exclusion if
+ // we find such a test case. The reason why this is so test-case driven is
+ // because this entire 'fixup' is only to prevent crashes (from the
+ // register scavenger) on not-really-valid inputs. For example, if we have:
+ // %a = alloca i1
+ // %b = bitcast i1* %a to i64*
+ // store i64* a, i64 b
+ // then the store should really be marked as 'align 1', but is not. If it
+ // were marked as 'align 1' then the indexed form would have been
+ // instruction-selected initially, and the problem this 'fixup' is preventing
+ // won't happen regardless.
+ if (FrameIdx < 0)
+ return;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ if (Align >= 4)
+ return;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasNonRISpills();
+}
+
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If Aligned is true, only accept displacements
@@ -1052,6 +1096,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
Disp = DAG.getTargetConstant(imm, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
@@ -1116,9 +1161,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
Disp = DAG.getTargetConstant(0, getPointerTy());
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- else
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else
Base = N;
return true; // [r+0]
}
@@ -1236,8 +1282,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
unsigned &LoOpFlags, const GlobalValue *GV = 0) {
- HiOpFlags = PPCII::MO_HA16;
- LoOpFlags = PPCII::MO_LO16;
+ HiOpFlags = PPCII::MO_HA;
+ LoOpFlags = PPCII::MO_LO;
// Don't use the pic base if not in PIC relocation model. Or if we are on a
// non-darwin platform. We don't support PIC on other platforms yet.
@@ -1350,9 +1396,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::LocalExec) {
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_HA);
+ PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_LO);
+ PPCII::MO_TPREL_LO);
SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
@@ -1364,12 +1410,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::InitialExec) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLS);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
PtrVT, TGA, TPOffsetHi);
- return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
if (Model == TLSModel::GeneralDynamic) {
@@ -1607,6 +1655,18 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
false, false, false, 0);
}
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+ // We have to copy the entire va_list struct:
+ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+ return DAG.getMemcpy(Op.getOperand(0), Op,
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, MVT::i32), 8, false, true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
return Op.getOperand(0);
@@ -2914,8 +2974,8 @@ struct TailCallArgumentInfo {
static void
StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
SDValue Chain,
- const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
- SmallVector<SDValue, 8> &MemOpChains,
+ const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+ SmallVectorImpl<SDValue> &MemOpChains,
SDLoc dl) {
for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
SDValue Arg = TailCallArgs[i].Arg;
@@ -2973,7 +3033,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
static void
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SDValue Arg, int SPDiff, unsigned ArgOffset,
- SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
@@ -3038,8 +3098,8 @@ static void
LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
SDValue Arg, SDValue PtrOff, int SPDiff,
unsigned ArgOffset, bool isPPC64, bool isTailCall,
- bool isVector, SmallVector<SDValue, 8> &MemOpChains,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+ bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
SDLoc dl) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
if (!isTailCall) {
@@ -3063,7 +3123,7 @@ static
void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
SDValue LROp, SDValue FPOp, bool isDarwinABI,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
MachineFunction &MF = DAG.getMachineFunction();
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
@@ -3090,8 +3150,8 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
- SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+ SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &PPCSubTarget) {
bool isPPC64 = PPCSubTarget.isPPC64();
@@ -3417,10 +3477,10 @@ SDValue
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- SDLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
@@ -5539,7 +5599,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
- SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
DAG.getRegister(PPC::CR6, MVT::i32),
CompNode.getValue(1));
@@ -5672,6 +5732,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VAARG:
return LowerVAARG(Op, DAG, PPCSubTarget);
+ case ISD::VACOPY:
+ return LowerVACOPY(Op, DAG, PPCSubTarget);
+
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
@@ -5767,6 +5830,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
+ // LowerFP_TO_INT() can only handle f32 and f64.
+ if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
}
@@ -6038,6 +6104,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// thisMBB:
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
// Prepare IP either in reg.
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
@@ -6049,10 +6116,25 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
.addReg(PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
-
MIB.setMemRefs(MMOBegin, MMOEnd);
}
+ // Naked functions never have a base pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned BaseReg;
+ if (MF->getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ else
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
const PPCRegisterInfo *TRI =
@@ -6124,12 +6206,14 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
unsigned BufReg = MI->getOperand(0).getReg();
@@ -6171,8 +6255,17 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
}
MIB.setMemRefs(MMOBegin, MMOEnd);
- // FIXME: When we also support base pointers, that register must also be
- // restored here.
+ // Reload BP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
@@ -7293,16 +7386,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
- // If the user is a MFCR instruction, we know this is safe. Otherwise we
- // give up for right now.
- if (FlagUser->getOpcode() == PPCISD::MFCR)
+ // If the user is a MFOCRF instruction, we know this is safe.
+ // Otherwise we give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFOCRF)
return SDValue(VCMPoNode, 0);
}
break;
}
case ISD::BR_CC: {
// If this is a branch on an altivec predicate comparison, lower this so
- // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
// lowering is done pre-legalize, because the legalizer lowers the predicate
// compare down to code that is difficult to reassemble.
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
@@ -7514,7 +7607,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
std::pair<unsigned, const TargetRegisterClass*>
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
@@ -7539,7 +7632,24 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
}
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+ // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+ // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+ // register.
+ // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+ // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+ if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ PPC::GPRCRegClass.contains(R.first)) {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ return std::make_pair(TRI->getMatchingSuperReg(R.first,
+ PPC::sub_32, &PPC::GPRCRegClass),
+ &PPC::G8RCRegClass);
+ }
+
+ return R;
}
@@ -7768,18 +7878,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return true;
}
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
- case MVT::v4f32:
return true;
default:
break;
@@ -7795,3 +7902,9 @@ Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::ILP;
}
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const {
+ return PPC::createFastISel(FuncInfo, LibInfo);
+}