aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp297
1 files changed, 211 insertions, 86 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 147e94b..871531e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -516,7 +516,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
- setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ else
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -574,15 +579,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
- // VSX v2i64 only supports non-arithmetic operations.
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ if (Subtarget.hasP8Altivec()) {
+ setOperationAction(ISD::SHL, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRA, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v2i64, Expand);
- setOperationAction(ISD::SRA, MVT::v2i64, Expand);
- setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
+ }
+ else {
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ }
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@@ -892,6 +906,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
+ } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 also benefits from (very) aggressive inlining of memcpy and
+ // friends. The overhead of a the function call, even when warm, can be
+ // over one hundred cycles.
+ MaxStoresPerMemset = 128;
+ MaxStoresPerMemcpy = 128;
+ MaxStoresPerMemmove = 128;
}
}
@@ -981,8 +1002,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
- case PPCISD::LARX: return "PPCISD::LARX";
- case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
@@ -1384,17 +1403,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// immediate field for would be zero, and we prefer to use vxor for it.
if (ValSizeInBytes < ByteSize) return SDValue();
- // If the element value is larger than the splat value, cut it in half and
- // check to see if the two halves are equal. Continue doing this until we
- // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
- while (ValSizeInBytes > ByteSize) {
- ValSizeInBytes >>= 1;
-
- // If the top half equals the bottom half, we're still ok.
- if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
- (Value & ((1 << (8*ValSizeInBytes))-1)))
- return SDValue();
- }
+ // If the element value is larger than the splat value, check if it consists
+ // of a repeated bit pattern of size ByteSize.
+ if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
+ return SDValue();
// Properly sign extend the value.
int MaskVal = SignExtend32(Value, ByteSize * 8);
@@ -2436,27 +2448,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// FPR - The set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const MCPhysReg *GetFPR() {
- static const MCPhysReg FPR[] = {
- PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
- };
+static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
+ PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
+ PPC::F11, PPC::F12, PPC::F13};
- return FPR;
-}
-
-/// GetQFPR - Get the set of QPX registers that should be allocated for
-/// arguments.
-static const MCPhysReg *GetQFPR() {
- static const MCPhysReg QFPR[] = {
- PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13
- };
-
- return QFPR;
-}
+/// QFPR - The set of QPX registers that should be allocated for arguments.
+static const MCPhysReg QFPR[] = {
+ PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
@@ -2880,9 +2881,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -2892,8 +2890,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
- static const MCPhysReg *QFPR = GetQFPR();
-
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
@@ -3291,9 +3287,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4187,7 +4180,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -4582,8 +4576,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4593,8 +4585,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
- static const MCPhysReg *QFPR = GetQFPR();
-
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
@@ -5280,8 +5270,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -6418,7 +6406,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, SDLoc dl) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
- static const EVT VTys[] = { // canonical VT to use for each size.
+ static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
@@ -7045,7 +7033,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
/// altivec comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
- bool &isDot) {
+ bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
@@ -7058,29 +7046,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 1;
+ }
+ else
+ return false;
+ break;
+
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtud:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
}
return true;
}
@@ -7094,7 +7136,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
int CompareOpc;
bool isDot;
- if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
@@ -7738,10 +7780,36 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- bool is64bit, unsigned BinOpcode) const {
+ unsigned AtomicSize,
+ unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch (AtomicSize) {
+ default:
+ llvm_unreachable("Unexpected size of atomic entity");
+ case 1:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 2:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 4:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case 8:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
+
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = BB;
@@ -7763,7 +7831,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
- RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
+ RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
// thisMBB:
@@ -7778,11 +7846,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -7800,6 +7868,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode) const {
+ // If we support part-word atomic mnemonics, just use them
+ if (Subtarget.hasPartwordAtomics())
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
+
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
@@ -8365,68 +8437,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
- BB = EmitAtomicBinary(MI, BB, false, 0);
+ BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
- BB = EmitAtomicBinary(MI, BB, true, 0);
+ BB = EmitAtomicBinary(MI, BB, 8, 0);
else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
- MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch(MI->getOpcode()) {
+ default:
+ llvm_unreachable("Compare and swap of unknown size");
+ case PPC::ATOMIC_CMP_SWAP_I8:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I16:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I32:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I64:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
@@ -8452,18 +8552,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(loop1MBB);
// loop1MBB:
- // l[wd]arx dest, ptr
+ // l[bhwd]arx dest, ptr
// cmp[wd] dest, oldval
// bne- midMBB
// loop2MBB:
- // st[wd]cx. newval, ptr
+ // st[bhwd]cx. newval, ptr
// bne- loopMBB
// b exitBB
// midMBB:
- // st[wd]cx. dest, ptr
+ // st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
.addReg(oldval).addReg(dest);
@@ -8473,7 +8573,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(midMBB);
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(newval).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
@@ -8482,7 +8582,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(exitMBB);
BB = midMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(dest).addReg(ptrA).addReg(ptrB);
BB->addSuccessor(exitMBB);
@@ -8682,6 +8782,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
MI->getOperand(0).getReg())
.addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ } else if (MI->getOpcode() == PPC::TCHECK_RET) {
+ DebugLoc Dl = MI->getDebugLoc();
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
+ return BB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -10184,7 +10290,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
- getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
@@ -10297,14 +10403,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpequb_p:
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpequd_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
KnownZero = ~1U; // All bits but the low one are known to be zero.
break;
}
@@ -10914,11 +11023,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+ (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
+ // We should use Altivec/VSX loads and stores when available. For unaligned
+ // addresses, unaligned VSX loads are only fast starting with the P8.
+ if (Subtarget.hasAltivec() && Size >= 16 &&
+ (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+ ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ return MVT::v4i32;
+
if (Subtarget.isPPC64()) {
return MVT::i64;
- } else {
- return MVT::i32;
}
+
+ return MVT::i32;
}
/// \brief Returns true if it is beneficial to convert a load of a constant