diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 297 |
1 files changed, 211 insertions, 86 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 147e94b..871531e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -516,7 +516,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } - setOperationAction(ISD::MUL, MVT::v4i32, Custom); + + if (Subtarget.hasP8Altivec()) + setOperationAction(ISD::MUL, MVT::v4i32, Legal); + else + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -574,15 +579,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); - // VSX v2i64 only supports non-arithmetic operations. - setOperationAction(ISD::ADD, MVT::v2i64, Expand); - setOperationAction(ISD::SUB, MVT::v2i64, Expand); + if (Subtarget.hasP8Altivec()) { + setOperationAction(ISD::SHL, MVT::v2i64, Legal); + setOperationAction(ISD::SRA, MVT::v2i64, Legal); + setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SHL, MVT::v2i64, Expand); - setOperationAction(ISD::SRA, MVT::v2i64, Expand); - setOperationAction(ISD::SRL, MVT::v2i64, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Legal); + } + else { + setOperationAction(ISD::SHL, MVT::v2i64, Expand); + setOperationAction(ISD::SRA, MVT::v2i64, Expand); + setOperationAction(ISD::SRL, MVT::v2i64, Expand); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + + // VSX v2i64 only supports non-arithmetic operations. + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + } setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); @@ -892,6 +906,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; + } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { + // The A2 also benefits from (very) aggressive inlining of memcpy and + // friends. The overhead of a the function call, even when warm, can be + // over one hundred cycles. + MaxStoresPerMemset = 128; + MaxStoresPerMemcpy = 128; + MaxStoresPerMemmove = 128; } } @@ -981,8 +1002,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; - case PPCISD::LARX: return "PPCISD::LARX"; - case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; @@ -1384,17 +1403,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); - // If the element value is larger than the splat value, cut it in half and - // check to see if the two halves are equal. Continue doing this until we - // get to ByteSize. This allows us to handle 0x01010101 as 0x01. - while (ValSizeInBytes > ByteSize) { - ValSizeInBytes >>= 1; - - // If the top half equals the bottom half, we're still ok. - if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != - (Value & ((1 << (8*ValSizeInBytes))-1))) - return SDValue(); - } + // If the element value is larger than the splat value, check if it consists + // of a repeated bit pattern of size ByteSize. + if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) + return SDValue(); // Properly sign extend the value. int MaskVal = SignExtend32(Value, ByteSize * 8); @@ -2436,27 +2448,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -/// GetFPR - Get the set of FP registers that should be allocated for arguments, +/// FPR - The set of FP registers that should be allocated for arguments, /// on Darwin. -static const MCPhysReg *GetFPR() { - static const MCPhysReg FPR[] = { - PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 - }; +static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, + PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, + PPC::F11, PPC::F12, PPC::F13}; - return FPR; -} - -/// GetQFPR - Get the set of QPX registers that should be allocated for -/// arguments. -static const MCPhysReg *GetQFPR() { - static const MCPhysReg QFPR[] = { - PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, - PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13 - }; - - return QFPR; -} +/// QFPR - The set of QPX registers that should be allocated for arguments. +static const MCPhysReg QFPR[] = { + PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. @@ -2880,9 +2881,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -2892,8 +2890,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; - static const MCPhysReg *QFPR = GetQFPR(); - const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); @@ -3291,9 +3287,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin( PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -4187,7 +4180,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -4582,8 +4576,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -4593,8 +4585,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; - static const MCPhysReg *QFPR = GetQFPR(); - const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); @@ -5280,8 +5270,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -6418,7 +6406,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, SDLoc dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); - static const EVT VTys[] = { // canonical VT to use for each size. + static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; @@ -7045,7 +7033,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, /// altivec comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, - bool &isDot) { + bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; @@ -7058,29 +7046,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 1; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 1; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtud_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 1; + } + else + return false; + break; + // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 0; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 0; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtud: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 0; + } + else + return false; + + break; } return true; } @@ -7094,7 +7136,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDLoc dl(Op); int CompareOpc; bool isDot; - if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) + if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. @@ -7738,10 +7780,36 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - bool is64bit, unsigned BinOpcode) const { + unsigned AtomicSize, + unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch (AtomicSize) { + default: + llvm_unreachable("Unexpected size of atomic entity"); + case 1: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 2: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 4: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case 8: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } + const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = BB; @@ -7763,7 +7831,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : - RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass + RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: @@ -7778,11 +7846,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); @@ -7800,6 +7868,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode) const { + // If we support part-word atomic mnemonics, just use them + if (Subtarget.hasPartwordAtomics()) + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the @@ -8365,68 +8437,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); + BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::AND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::OR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); + BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) - BB = EmitAtomicBinary(MI, BB, false, 0); + BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) - BB = EmitAtomicBinary(MI, BB, true, 0); + BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch(MI->getOpcode()) { + default: + llvm_unreachable("Compare and swap of unknown size"); + case PPC::ATOMIC_CMP_SWAP_I8: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I16: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I32: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case PPC::ATOMIC_CMP_SWAP_I64: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); unsigned ptrB = MI->getOperand(2).getReg(); @@ -8452,18 +8552,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(loop1MBB); // loop1MBB: - // l[wd]arx dest, ptr + // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: - // st[wd]cx. newval, ptr + // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: - // st[wd]cx. dest, ptr + // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); @@ -8473,7 +8573,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(midMBB); BB = loop2MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); @@ -8482,7 +8582,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(exitMBB); BB = midMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); @@ -8682,6 +8782,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); + } else if (MI->getOpcode() == PPC::TCHECK_RET) { + DebugLoc Dl = MI->getDebugLoc(); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); + return BB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -10184,7 +10290,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && - getAltivecCompareInfo(LHS, CompareOpc, isDot)) { + getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know @@ -10297,14 +10403,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: + case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: + case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: + case Intrinsic::ppc_altivec_vcmpgtud_p: KnownZero = ~1U; // All bits but the low one are known to be zero. break; } @@ -10914,11 +11023,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { + const Function *F = MF.getFunction(); + // When expanding a memset, require at least two QPX instructions to cover + // the cost of loading the value to be stored from the constant pool. + if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && + (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && + !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + return MVT::v4f64; + } + + // We should use Altivec/VSX loads and stores when available. For unaligned + // addresses, unaligned VSX loads are only fast starting with the P8. + if (Subtarget.hasAltivec() && Size >= 16 && + (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || + ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + return MVT::v4i32; + if (Subtarget.isPPC64()) { return MVT::i64; - } else { - return MVT::i32; } + + return MVT::i32; } /// \brief Returns true if it is beneficial to convert a load of a constant |