diff options
Diffstat (limited to 'lib/Target/PowerPC')
29 files changed, 1023 insertions, 192 deletions
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 000d6d4..61d23ce 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -17,16 +17,12 @@ #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "PPCGenAsmWriter.inc" -StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 21fc733..73fd534 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -24,9 +24,9 @@ class PPCInstPrinter : public MCInstPrinter { // 0 -> AIX, 1 -> Darwin. unsigned SyntaxVariant; public: - PPCInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - unsigned syntaxVariant) - : MCInstPrinter(MAI, MRI), SyntaxVariant(syntaxVariant) {} + PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, unsigned syntaxVariant) + : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {} bool isDarwinSyntax() const { return SyntaxVariant == 1; @@ -34,9 +34,6 @@ public: virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; - - static const char *getInstructionName(unsigned Opcode); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 9c6eefe..48de583 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 226fbfe..6568e82 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -108,9 +108,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createPPCMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new PPCInstPrinter(MAI, MRI, SyntaxVariant); + return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 724374c..c554d39 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -34,6 +34,7 @@ def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">; def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; +def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -87,6 +88,10 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureFSqrt, FeatureSTFIWX, + Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4abb469..fb7aa71 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -450,6 +450,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc7400", "ppc750", "ppc970", + "ppcA2", "ppc64" }; diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 9883c2e..b2b5364 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -12,10 +12,6 @@ // //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A> - : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; - //===----------------------------------------------------------------------===// // Return Value Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index ae317af..6ed1fb9 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -22,17 +22,29 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// PowerPC 440 Hazard Recognizer -void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { +// PowerPC Scoreboard Hazard Recognizer +void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { const MCInstrDesc *MCID = DAG->getInstrDesc(SU); - if (!MCID) { + if (!MCID) // This is a PPC pseudo-instruction. return; - } ScoreboardHazardRecognizer::EmitInstruction(SU); } +ScheduleHazardRecognizer::HazardType +PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void PPCScoreboardHazardRecognizer::AdvanceCycle() { + ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCScoreboardHazardRecognizer::Reset() { + ScoreboardHazardRecognizer::Reset(); +} + //===----------------------------------------------------------------------===// // PowerPC 970 Hazard Recognizer // @@ -61,7 +73,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) : TII(tii) { - LastWasBL8_ELF = false; EndDispatchGroup(); } @@ -132,15 +143,6 @@ getHazardType(SUnit *SU, int Stalls) { return NoHazard; unsigned Opcode = MI->getOpcode(); - - // If the last instruction was a BL8_ELF, then the NOP must follow it - // directly (this is strong requirement from the linker due to the ELF ABI). - // We return only Hazard (and not NoopHazard) because if the NOP is necessary - // then it will already be in the instruction stream (it is not always - // necessary; tail calls, for example, do not need it). - if (LastWasBL8_ELF && Opcode != PPC::NOP) - return Hazard; - bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = GetInstrType(Opcode, isFirst, isSingle, isCracked, @@ -199,8 +201,6 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { return; unsigned Opcode = MI->getOpcode(); - LastWasBL8_ELF = (Opcode == PPC::BL8_ELF); - bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = GetInstrType(Opcode, isFirst, isSingle, isCracked, @@ -240,7 +240,6 @@ void PPCHazardRecognizer970::AdvanceCycle() { } void PPCHazardRecognizer970::Reset() { - LastWasBL8_ELF = false; EndDispatchGroup(); } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index d80a385..55b45d0 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -21,16 +21,19 @@ namespace llvm { -/// PPCHazardRecognizer440 - This class implements a scoreboard-based -/// hazard recognizer for the PPC 440 and friends. -class PPCHazardRecognizer440 : public ScoreboardHazardRecognizer { +/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for generic PPC processors. +class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer { const ScheduleDAG *DAG; public: - PPCHazardRecognizer440(const InstrItineraryData *ItinData, + PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG_) : ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void Reset(); }; /// PPCHazardRecognizer970 - This class defines a finite state automata that @@ -49,9 +52,6 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. bool HasCTRSet; - // Was the last instruction issued a BL8_ELF - bool LastWasBL8_ELF; - // StoredPtr - Keep track of the address of any store. If we see a load from // the same address (or one that aliases it), disallow the store. We can have // up to four stores in one dispatch group, hence we track up to 4. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6651d14..5a04888 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -377,8 +377,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { DebugLoc dl = N->getDebugLoc(); APInt LKZ, LKO, RKZ, RKO; - CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO); - CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO); + CurDAG->ComputeMaskedBits(Op0, LKZ, LKO); + CurDAG->ComputeMaskedBits(Op1, RKZ, RKO); unsigned TargetMask = LKZ.getZExtValue(); unsigned InsertMask = RKZ.getZExtValue(); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 85b5bc1..3b24951 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -226,11 +226,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - // VAARG is custom lowered with the 32-bit SVR4 ABI. - if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI() - && !TM.getSubtarget<PPCSubtarget>().isPPC64()) { - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::i64, Custom); + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { + if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + // VAARG always uses double-word chunks, so promote anything smaller. + setOperationAction(ISD::VAARG, MVT::i1, Promote); + AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i8, Promote); + AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i16, Promote); + AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i32, Promote); + AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + } else { + // VAARG is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i64, Custom); + } } else setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -377,6 +389,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } + if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) + setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); @@ -431,7 +446,16 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) return 4; - // FIXME SVR4 TBD + + // 16byte and wider vectors are passed on 16byte boundary. + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VTy->getBitWidth() >= 128) + return 16; + + // The rest is 8 on PPC64 and 4 on PPC32 boundary. + if (PPCSubTarget.isPPC64()) + return 8; + return 4; } @@ -460,6 +484,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; + case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; case PPCISD::NOP: return "PPCISD::NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; @@ -835,14 +860,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, APInt LHSKnownZero, LHSKnownOne; APInt RHSKnownZero, RHSKnownOne; DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), LHSKnownZero, LHSKnownOne); if (LHSKnownZero.getBoolValue()) { DAG.ComputeMaskedBits(N.getOperand(1), - APInt::getAllOnesValue(N.getOperand(1) - .getValueSizeInBits()), RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -897,10 +918,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), - LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't @@ -1013,10 +1031,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), - LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -2801,9 +2816,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); } - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - // Add a NOP immediately after the branch instruction when using the 64-bit // SVR4 ABI. At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub @@ -2812,8 +2824,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. + + bool needsTOCRestore = false; if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); if (CallOpc == PPCISD::BCTRL_SVR4) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -2824,14 +2837,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. - Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); - InFlag = Chain.getValue(1); - } else { + needsTOCRestore = true; + } else if (CallOpc == PPCISD::CALL_SVR4) { // Otherwise insert NOP. - InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag); + CallOpc = PPCISD::CALL_NOP_SVR4; } } + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + if (needsTOCRestore) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + InFlag = Chain.getValue(1); + } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), InFlag); @@ -5486,12 +5507,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, //===----------------------------------------------------------------------===// void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { @@ -5725,7 +5745,7 @@ bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ return (V > -(1 << 16) && V < (1 << 16)-1); } -bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { +bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { return false; } @@ -5818,3 +5838,12 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } } + +Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { + unsigned Directive = PPCSubTarget.getDarwinDirective(); + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) + return Sched::ILP; + + return TargetLowering::getSchedulingPreference(N); +} + diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 2e046c4..18eb072 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -95,7 +95,9 @@ namespace llvm { EXTSW_32, /// CALL - A direct function call. - CALL_Darwin, CALL_SVR4, + /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit + /// SVR4 calls. + CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4, /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, @@ -279,6 +281,7 @@ namespace llvm { bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// @@ -293,7 +296,6 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 78f3596..7f67a41 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -89,10 +89,22 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", BrB, []>; // See Pat patterns below. + + let isCodeGenOnly = 1 in + def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, variable_ops), + "bl $func\n\tnop", BrB, []>; + def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; + + let isCodeGenOnly = 1 in + def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, + (outs), (ins aaddr:$func, variable_ops), + "bla $func\n\tnop", BrB, + [(PPCcall_nop_SVR4 (i64 imm:$func))]>; } let Uses = [X11, CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, @@ -111,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)), + (BL8_NOP_ELF tglobaladdr:$dst)>; + def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)), + (BL8_NOP_ELF texternalsym:$dst)>; + def : Pat<(PPCnop), (NOP)>; @@ -506,7 +524,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStGeneral, + "lhau $rD, $disp($rA)", LdStLoad, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! @@ -516,38 +534,38 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), - "lbz $rD, $src", LdStGeneral, + "lbz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), - "lhz $rD, $src", LdStGeneral, + "lhz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), - "lwz $rD, $src", LdStGeneral, + "lwz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStGeneral, + "lbzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStGeneral, + "lhzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStGeneral, + "lwzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStGeneral, + "lbzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStGeneral, + "lhzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStGeneral, + "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; } @@ -595,24 +613,24 @@ def : Pat<(PPCload xaddr:$src), let PPC970_Unit = 2 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), - "stb $rS, $src", LdStGeneral, + "stb $rS, $src", LdStStore, [(truncstorei8 G8RC:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), - "sth $rS, $src", LdStGeneral, + "sth $rS, $src", LdStStore, [(truncstorei16 G8RC:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), - "stw $rS, $src", LdStGeneral, + "stw $rS, $src", LdStStore, [(truncstorei32 G8RC:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), - "stbx $rS, $dst", LdStGeneral, + "stbx $rS, $dst", LdStStore, [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), - "sthx $rS, $dst", LdStGeneral, + "sthx $rS, $dst", LdStStore, [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), - "stwx $rS, $dst", LdStGeneral, + "stwx $rS, $dst", LdStStore, [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; // Normal 8-byte stores. @@ -629,14 +647,14 @@ let PPC970_Unit = 2 in { def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + "stbu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + "sthu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 707fa41..6c0f3d3 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -188,85 +188,85 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStGeneral /*FIXME*/, []>; + "dss $STRM", LdStLoad /*FIXME*/, []>; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStGeneral /*FIXME*/, []>; + "dssall", LdStLoad /*FIXME*/, []>; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), - "mfvscr $vD", LdStGeneral, + "mfvscr $vD", LdStStore, [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), - "mtvscr $vB", LdStGeneral, + "mtvscr $vB", LdStLoad, [(int_ppc_altivec_mtvscr VRRC:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), - "lvebx $vD, $src", LdStGeneral, + "lvebx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), - "lvehx $vD, $src", LdStGeneral, + "lvehx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), - "lvewx $vD, $src", LdStGeneral, + "lvewx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), - "lvx $vD, $src", LdStGeneral, + "lvx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), - "lvxl $vD, $src", LdStGeneral, + "lvxl $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), - "lvsl $vD, $src", LdStGeneral, + "lvsl $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), - "lvsr $vD, $src", LdStGeneral, + "lvsr $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), - "stvebx $rS, $dst", LdStGeneral, + "stvebx $rS, $dst", LdStStore, [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), - "stvehx $rS, $dst", LdStGeneral, + "stvehx $rS, $dst", LdStStore, [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), - "stvewx $rS, $dst", LdStGeneral, + "stvewx $rS, $dst", LdStStore, [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), - "stvx $rS, $dst", LdStGeneral, + "stvx $rS, $dst", LdStStore, [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), - "stvxl $rS, $dst", LdStGeneral, + "stvxl $rS, $dst", LdStStore, [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; } diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index d332e2a..d8e4b2b 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -51,6 +51,36 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } +// Two joined instructions; used to emit two adjacent instructions as one. +// The itinerary from the first instruction is used for scheduling and +// classification. +class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : Instruction { + field bits<64> Inst; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode1; + let Inst{32-37} = opcode2; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; +} // 1.7.1 I-Form class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, @@ -164,6 +194,35 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; + + let Inst{38-42} = A; + let Inst{43-47} = Addr{20-16}; // Base Reg + let Inst{48-63} = Addr{15-0}; // Displacement +} + +// This is used to emit BL8+NOP. +class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : IForm_and_DForm_1<opcode1, aa, lk, opcode2, + OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let Addr = 0; +} + class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 7a8ec40..b45ada9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -49,9 +50,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_440) { + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { const InstrItineraryData *II = TM->getInstrItineraryData(); - return new PPCHazardRecognizer440(II, DAG); + return new PPCScoreboardHazardRecognizer(II, DAG); } return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); @@ -65,14 +66,14 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // Most subtargets use a PPC970 recognizer. - if (Directive != PPC::DIR_440) { + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { const TargetInstrInfo *TII = TM.getInstrInfo(); assert(TII && "No InstrInfo?"); return new PPCHazardRecognizer970(*TII); } - return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); + return new PPCScoreboardHazardRecognizer(II, DAG); } unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { @@ -684,6 +685,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::GC_LABEL: case PPC::DBG_VALUE: return 0; + case PPC::BL8_NOP_ELF: + case PPC::BLA8_NOP_ELF: + return 8; default: return 4; // PowerPC instructions are all 4 bytes } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 939b71a..748486c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; @@ -542,6 +545,9 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; +def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), + (DCBT xoaddr:$dst)>; + // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -637,7 +643,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in -def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; //===----------------------------------------------------------------------===// // PPC32 Load Instructions. @@ -646,17 +652,17 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; // Unindexed (r+i) Loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), - "lbz $rD, $src", LdStGeneral, + "lbz $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), - "lhz $rD, $src", LdStGeneral, + "lhz $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), - "lwz $rD, $src", LdStGeneral, + "lwz $rD, $src", LdStLoad, [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), @@ -670,22 +676,22 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStGeneral, + "lbzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStGeneral, + "lhau $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStGeneral, + "lhzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStGeneral, + "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -705,25 +711,25 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStGeneral, + "lbzx $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStGeneral, + "lhzx $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStGeneral, + "lwzx $rD, $src", LdStLoad, [(set GPRC:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), - "lhbrx $rD, $src", LdStGeneral, + "lhbrx $rD, $src", LdStLoad, [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), - "lwbrx $rD, $src", LdStGeneral, + "lwbrx $rD, $src", LdStLoad, [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), @@ -741,13 +747,13 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), - "stb $rS, $src", LdStGeneral, + "stb $rS, $src", LdStStore, [(truncstorei8 GPRC:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), - "sth $rS, $src", LdStGeneral, + "sth $rS, $src", LdStStore, [(truncstorei16 GPRC:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), - "stw $rS, $src", LdStGeneral, + "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), "stfs $rS, $dst", LdStUX, @@ -761,33 +767,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + "stbu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + "sthu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStGeneral, + "stwu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStGeneral, + "stfsu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStGeneral, + "stfdu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -798,29 +804,29 @@ def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, // let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), - "stbx $rS, $dst", LdStGeneral, + "stbx $rS, $dst", LdStStore, [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), - "sthx $rS, $dst", LdStGeneral, + "sthx $rS, $dst", LdStStore, [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), - "stwx $rS, $dst", LdStGeneral, + "stwx $rS, $dst", LdStStore, [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; let mayStore = 1 in { def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), - "stwux $rS, $rA, $rB", LdStGeneral, + "stwux $rS, $rA, $rB", LdStStore, []>; } def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), - "sthbrx $rS, $dst", LdStGeneral, + "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), - "stwbrx $rS, $dst", LdStGeneral, + "stwbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 4590f00..a6528c0 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -291,9 +291,10 @@ void PPC64CompilationCallback() { } #endif -extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +extern "C" { +static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; @@ -337,6 +338,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, // stack after we restore all regs. return Target; } +} diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2976f01..ef13571 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -554,7 +554,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // clear can be encoded. This is extremely uncommon, because normally you // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. - if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm + (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 4e37d0a..8c0a858 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -50,7 +50,8 @@ def BrMCRX : InstrItinClass; def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; -def LdStGeneral : InstrItinClass; +def LdStLoad : InstrItinClass; +def LdStStore : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; def LdStUX : InstrItinClass; @@ -107,6 +108,7 @@ include "PPCSchedule440.td" include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" +include "PPCScheduleA2.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -150,8 +152,8 @@ include "PPCScheduleG5.td" // dcbf LdStDCBF // dcbi LdStDCBI // dcbst LdStDCBF -// dcbt LdStGeneral -// dcbtst LdStGeneral +// dcbt LdStLoad +// dcbtst LdStLoad // dcbz LdStDCBF // divd IntDivD // divdu IntDivD @@ -160,9 +162,9 @@ include "PPCScheduleG5.td" // dss LdStDSS // dst LdStDSS // dstst LdStDSS -// eciwx LdStGeneral -// ecowx LdStGeneral -// eieio LdStGeneral +// eciwx LdStLoad +// ecowx LdStLoad +// eieio LdStLoad // eqv IntGeneral // extsb IntGeneral // extsh IntGeneral @@ -202,10 +204,10 @@ include "PPCScheduleG5.td" // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC -// lbz LdStGeneral -// lbzu LdStGeneral +// lbz LdStLoad +// lbzu LdStLoad // lbzux LdStUX -// lbzx LdStGeneral +// lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX // ldu LdStLD @@ -223,11 +225,11 @@ include "PPCScheduleG5.td" // lhau LdStLHA // lhaux LdStLHA // lhax LdStLHA -// lhbrx LdStGeneral -// lhz LdStGeneral -// lhzu LdStGeneral +// lhbrx LdStLoad +// lhz LdStLoad +// lhzu LdStLoad // lhzux LdStUX -// lhzx LdStGeneral +// lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW // lswx LdStLMW @@ -242,11 +244,11 @@ include "PPCScheduleG5.td" // lwarx LdStLWARX // lwaux LdStLHA // lwax LdStLHA -// lwbrx LdStGeneral -// lwz LdStGeneral -// lwzu LdStGeneral +// lwbrx LdStLoad +// lwz LdStLoad +// lwzu LdStLoad // lwzux LdStUX -// lwzx LdStGeneral +// lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral // mcrxr BrMCRX @@ -307,10 +309,10 @@ include "PPCScheduleG5.td" // srawi IntShift // srd IntRotateD // srw IntGeneral -// stb LdStGeneral -// stbu LdStGeneral -// stbux LdStGeneral -// stbx LdStGeneral +// stb LdStStore +// stbu LdStStore +// stbux LdStStore +// stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX // stdu LdStSTD @@ -325,11 +327,11 @@ include "PPCScheduleG5.td" // stfsu LdStUX // stfsux LdStUX // stfsx LdStUX -// sth LdStGeneral -// sthbrx LdStGeneral -// sthu LdStGeneral -// sthux LdStGeneral -// sthx LdStGeneral +// sth LdStStore +// sthbrx LdStStore +// sthu LdStStore +// sthux LdStStore +// sthx LdStStore // stmw LdStLMW // stswi LdStLMW // stswx LdStLMW @@ -338,12 +340,12 @@ include "PPCScheduleG5.td" // stvewx LdStSTVEBX // stvx LdStSTVEBX // stvxl LdStSTVEBX -// stw LdStGeneral -// stwbrx LdStGeneral +// stw LdStStore +// stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStGeneral -// stwux LdStGeneral -// stwx LdStGeneral +// stwu LdStStore +// stwux LdStStore +// stwx LdStStore // subf IntGeneral // subfc IntGeneral // subfe IntGeneral diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index 76f7465..419faea 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -270,15 +270,23 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStGeneral , [InstrStage<1, [IFTH1, IFTH2]>, + InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, InstrStage<1, [LRACC]>, InstrStage<1, [AGEN]>, InstrStage<1, [CRD]>, InstrStage<2, [LWB]>], - [9, 5], // FIXME: should be [9, 5] for loads and - // [8, 5] for stores. + [9, 5], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], [NoBypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, @@ -345,6 +353,46 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td new file mode 100644 index 0000000..857ba40 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -0,0 +1,652 @@ +//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// A2 Processor User's Manual. +// IBM (as updated in) 2010. + +//===----------------------------------------------------------------------===// +// Functional units on the PowerPC A2 chip sets +// +def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1 +def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2 +def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3 +def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4 +def IU4_0 : FuncUnit; // Instruction buffer slot 1 +def IU4_1 : FuncUnit; // Instruction buffer slot 2 +def IU4_2 : FuncUnit; // Instruction buffer slot 3 +def IU4_3 : FuncUnit; // Instruction buffer slot 4 +def IU4_4 : FuncUnit; // Instruction buffer slot 5 +def IU4_5 : FuncUnit; // Instruction buffer slot 6 +def IU4_6 : FuncUnit; // Instruction buffer slot 7 +def IU4_7 : FuncUnit; // Instruction buffer slot 8 +def IU5 : FuncUnit; // Dependency resolution +def IU6 : FuncUnit; // Instruction issue +def RF0 : FuncUnit; +def XRF1 : FuncUnit; +def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline +def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline +def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline +def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline +def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline +def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline +def FRF1 : FuncUnit; +def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline +def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline +def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline +def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline +def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline +def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline + +def CR_Bypass : Bypass; // The bypass for condition regs. +//def GPR_Bypass : Bypass; // The bypass for general-purpose regs. +//def FPR_Bypass : Bypass; // The bypass for floating-point regs. + +// +// This file defines the itinerary class data for the PPC A2 processor. +// +//===----------------------------------------------------------------------===// + + +def PPCA2Itineraries : ProcessorItineraries< + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3, + IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7, + IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6, + FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntGeneral , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>], + [53, 7, 7], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStore , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStICBI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStUX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStLFD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHA , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>, + InstrItinData<SprISYNC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, + InstrItinData<SprMFSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [GPR_Bypass, NoBypass]>, + InstrItinData<SprMTMSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, + InstrItinData<SprMFCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [GPR_Bypass, NoBypass]>, + InstrItinData<SprMFSPR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprRFI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprSC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [13, 7, 7], + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>, + InstrStage<71, [FEX1], 0>, + InstrStage<71, [FEX2], 0>, + InstrStage<71, [FEX3], 0>, + InstrStage<71, [FEX4], 0>, + InstrStage<71, [FEX5], 0>, + InstrStage<71, [FEX6]>], + [86, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>, + InstrStage<58, [FEX1], 0>, + InstrStage<58, [FEX2], 0>, + InstrStage<58, [FEX3], 0>, + InstrStage<58, [FEX4], 0>, + InstrStage<58, [FEX5], 0>, + InstrStage<58, [FEX6]>], + [73, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPSqrt , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>, + InstrStage<68, [FEX1], 0>, + InstrStage<68, [FEX2], 0>, + InstrStage<68, [FEX3], 0>, + InstrStage<68, [FEX4], 0>, + InstrStage<68, [FEX5], 0>, + InstrStage<68, [FEX6]>], + [86, 7], // FIXME: should be [86, 7] for double + // and [82, 7] for single. Likewise, + // the FEX? cycle count should be 68 + // for double and 64 for single. + [NoBypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7], + [FPR_Bypass, FPR_Bypass]> +]>; diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index e7e5498..bc926f7 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -32,7 +32,8 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index 87a3151..f7ec1e0 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -31,7 +31,8 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>, InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index f76557a..37ebfc5 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -34,7 +34,8 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index bc0820b..d1e40ce 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -35,7 +35,8 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>, InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c89fab3..f405b47 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -146,10 +146,11 @@ bool PPCSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - if (DarwinDirective == PPC::DIR_440) - return false; + if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2) + Mode = TargetSubtargetInfo::ANTIDEP_ALL; + else + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); if (isPPC64()) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 69fe50b..a275029 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -40,6 +40,7 @@ namespace PPC { DIR_7400, DIR_750, DIR_970, + DIR_A2, DIR_64 }; } @@ -144,6 +145,8 @@ public: /// isDarwin - True if this is any darwin platform. bool isDarwin() const { return TargetTriple.isMacOSX(); } + /// isBGP - True if this is a BG/P platform. + bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ba9c779..d113976 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -39,6 +39,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { + + // The binutils for the BG/P are too old for CFI. + if (Subtarget.isBGP()) + setMCUseCFI(false); } void PPC32TargetMachine::anchor() { } |