diff options
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r-- | lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPC.td | 10 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCAsmPrinter.cpp | 48 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 9 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 74 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstr64Bit.td | 59 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.cpp | 6 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 93 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCSchedule.td | 88 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCSchedule440.td | 60 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleA2.td | 81 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleE500mc.td | 265 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleE5500.td | 309 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleG3.td | 7 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleG4.td | 7 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleG4Plus.td | 8 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleG5.td | 10 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCSubtarget.h | 2 |
20 files changed, 1005 insertions, 138 deletions
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index d175e3e..413142e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -137,7 +137,7 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { char Value = MI->getOperand(OpNo).getImm(); - Value = (Value << (32-5)) >> (32-5); + Value = SignExtend32<5>(Value); O << (int)Value; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 245b457..b9ea8b5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; - LCOMMDirectiveType = LCOMM::NoAlignment; AssemblerDialect = 0; // Old-Style mnemonics. } diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index b7f1688..cb15dad 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -35,6 +35,10 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; +def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E500mc", "">; +def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E5500", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; @@ -94,6 +98,12 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : ProcessorModel<"e500mc", PPCE500mcModel, + [DirectiveE500mc, FeatureMFOCRF, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; +def : ProcessorModel<"e5500", PPCE5500Model, + [DirectiveE5500, FeatureMFOCRF, Feature64Bit, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureISEL, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index f76b89c..6e0e8bb 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -109,6 +109,8 @@ namespace { bool doFinalization(Module &M); virtual void EmitFunctionEntryLabel(); + + void EmitFunctionBodyEnd(); }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -345,23 +347,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitLabel(PICBase); return; } + case PPC::LDtocJTI: + case PPC::LDtocCPT: case PPC::LDtoc: { // Transform %X3 = LDtoc <ga:@min1>, %X2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - + // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); - assert(MO.isGlobal()); - - // Map symbol -> label of TOC entry. - MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())]; + + // Map symbol -> label of TOC entry + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = 0; + if (MO.isGlobal()) + MOSymbol = Mang->getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + MCSymbol *&TOCEntry = TOC[MOSymbol]; if (TOCEntry == 0) TOCEntry = GetTempSymbol("C", TOCLabelID++); - + const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -406,9 +417,9 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -441,6 +452,23 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyEnd - Print the traceback table before the .size +/// directive. +/// +void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() { + // Only the 64-bit target requires a traceback table. For now, + // we only emit the word of zeroes that GDB requires to find + // the end of the function, and zeroes for the eight-byte + // mandatory fields. + // FIXME: We should fill in the eight-byte mandatory fields as described in + // the PPC64 ELF ABI (this is a low-priority item because GDB does not + // currently make use of these fields). + if (Subtarget.isPPC64()) { + OutStreamer.EmitIntValue(0, 4/*size*/); + OutStreamer.EmitIntValue(0, 8/*size*/); + } +} + void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static const char *const CPUDirectives[] = { "", @@ -453,6 +481,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc750", "ppc970", "ppcA2", + "ppce500mc", + "ppce5500", "power6", "power7", "ppc64" diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index a00f686..e8f4d16 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -975,6 +975,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case ISD::AND: { unsigned Imm, Imm2, SH, MB, ME; + uint64_t Imm64; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm @@ -993,6 +994,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } + // If this is a 64-bit zero-extension mask, emit rldicl. + if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && + isMask_64(Imm64)) { + SDValue Val = N->getOperand(0); + MB = 64 - CountTrailingOnes_64(Imm64); + SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) }; + return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); + } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 61d44c5..dbb3b14 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -449,6 +449,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); + + // The Freescale cores does better with aggressive inlining of memcpy and + // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). + if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || + Subtarget->getDarwinDirective() == PPC::DIR_E5500) { + maxStoresPerMemset = 32; + maxStoresPerMemsetOptSize = 16; + maxStoresPerMemcpy = 32; + maxStoresPerMemcpyOptSize = 8; + maxStoresPerMemmove = 32; + maxStoresPerMemmoveOptSize = 8; + + setPrefFunctionAlignment(4); + benefitFromCodePlacementOpt = true; + } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate @@ -517,6 +532,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; + case PPCISD::CR6SET: return "PPCISD::CR6SET"; + case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; } } @@ -811,14 +828,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { } // Properly sign extend the value. - int ShAmt = (4-ByteSize)*8; - int MaskVal = ((int)Value << ShAmt) >> ShAmt; + int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it - if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) + if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, MVT::i32); return SDValue(); } @@ -1204,6 +1220,14 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); const Constant *C = CP->getConstVal(); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); + return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue CPIHi = @@ -1217,6 +1241,14 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -1441,7 +1473,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, MachinePointerInfo(), MVT::i32, false, false, 0); - return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, false, 0); } @@ -2408,7 +2440,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. - (Addr << 6 >> 6) != Addr) + SignExtend32<26>(Addr) != Addr) return 0; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, @@ -2819,6 +2851,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, isTailCall, RegsToPass, Ops, NodeTys, PPCSubTarget); + // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls + if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); + // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. @@ -3116,14 +3152,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Set CR6 to true if this is a vararg call with floating args passed in - // registers. - if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, - dl, MVT::i32), 0); - RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); - } - // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; @@ -3133,6 +3161,18 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + // Set CR bit 6 to true if this is a vararg call with floating args passed in + // registers. + if (isVarArg) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Ops[] = { Chain, InFlag }; + + Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, + dl, VTs, Ops, InFlag.getNode() ? 2 : 1); + + InFlag = Chain.getValue(1); + } + if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); @@ -4126,7 +4166,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. - if (SextVal == (i << (int)TypeShiftAmt)) { + if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, @@ -4171,17 +4211,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } // t = vsplti c, result = vsldoi t, t, 1 - if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { + if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 - if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 - if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b0a013b..902b188 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -174,6 +174,10 @@ namespace llvm { /// operand #3 optional in flag TC_RETURN, + /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls + CR6SET, + CR6UNSET, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 39778a5..cfe71d17 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -29,6 +29,9 @@ def symbolLo64 : Operand<i64> { let PrintMethod = "printSymbolLo"; let EncoderMethod = "getLO16Encoding"; } +def tocentry : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -296,12 +299,14 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, [(set G8RC:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; +} // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), @@ -459,7 +464,7 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), - "sradi $rA, $rS, $SH", IntRotateD, + "sradi $rA, $rS, $SH", IntRotateDI, [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; } def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), @@ -482,7 +487,7 @@ def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), let isCommutable = 1 in { def RLDIMI : MDForm_1<30, 3, (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldimi $rA, $rS, $SH, $MB", IntRotateD, + "rldimi $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } @@ -494,11 +499,11 @@ def RLDCL : MDForm_1<30, 0, []>, isPPC64; def RLDICL : MDForm_1<30, 0, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldicl $rA, $rS, $SH, $MB", IntRotateD, + "rldicl $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64; def RLDICR : MDForm_1<30, 1, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), - "rldicr $rA, $rS, $SH, $ME", IntRotateD, + "rldicr $rA, $rS, $SH, $ME", IntRotateDI, []>, isPPC64; def RLWINM8 : MForm_2<21, @@ -541,19 +546,19 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLoad, + "lhau $rD, $disp($rA)", LdStLHAU, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwaux $rD, $addr", LdStLoad, + "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -584,31 +589,31 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -624,6 +629,14 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "", [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; +def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; +def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; let hasSideEffects = 1 in { let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. @@ -642,13 +655,13 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), - "ldu $rD, $addr", LdStLD, + "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "ldux $rD, $addr", LdStLoad, + "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -695,14 +708,14 @@ let PPC970_Unit = 2 in { def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -710,7 +723,7 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -718,7 +731,7 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTD, + "stdu $rS, $ptroff($ptrreg)", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, @@ -727,7 +740,7 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -736,7 +749,7 @@ def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -745,7 +758,7 @@ def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -754,7 +767,7 @@ def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStStore, + "stdux $rS, $ptroff, $ptrreg", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 47f09dc..d2df664 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -54,7 +54,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || + Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = TM->getInstrItineraryData(); return new PPCScoreboardHazardRecognizer(II, DAG); } @@ -70,7 +71,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // Most subtargets use a PPC970 recognizer. - if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && + Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { const TargetInstrInfo *TII = TM.getInstrInfo(); assert(TII && "No InstrInfo?"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index f57f0c9..a503908 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -123,9 +123,11 @@ def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, @@ -153,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; +// Instructions to set/unset CR bit 6 for SVR4 vararg calls +def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Instructions to support atomic operations def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, [SDNPHasChain, SDNPMayLoad]>; @@ -330,9 +338,6 @@ def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIXEncoding"; } -def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); -} // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. @@ -673,7 +678,7 @@ def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), - "lfs $rD, $src", LdStLFDU, + "lfs $rD, $src", LdStLFD, [(set F4RC:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, @@ -683,32 +688,32 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLoad, + "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfs $rD, $addr", LdStLFDU, + "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfd $rD, $addr", LdStLFD, + "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -716,37 +721,37 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfsux $rD, $addr", LdStLoad, + "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfdux $rD, $addr", LdStLoad, + "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -778,10 +783,10 @@ def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), - "lfsx $frD, $src", LdStLFDU, + "lfsx $frD, $src", LdStLFD, [(set F4RC:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), - "lfdx $frD, $src", LdStLFDU, + "lfdx $frD, $src", LdStLFD, [(set F8RC:$frD, (load xaddr:$src))]>; } @@ -801,10 +806,10 @@ def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), - "stfs $rS, $dst", LdStUX, + "stfs $rS, $dst", LdStSTFD, [(store F4RC:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), - "stfd $rS, $dst", LdStUX, + "stfd $rS, $dst", LdStSTFD, [(store F8RC:$rS, iaddr:$dst)]>; } @@ -812,33 +817,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStStore, + "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStStore, + "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -863,7 +868,7 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -872,7 +877,7 @@ def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -881,7 +886,7 @@ def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -889,7 +894,7 @@ def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStStore, + "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -897,7 +902,7 @@ def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStStore, + "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -913,14 +918,14 @@ def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), - "stfiwx $frS, $dst", LdStUX, + "stfiwx $frS, $dst", LdStSTFD, [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), - "stfsx $frS, $dst", LdStUX, + "stfsx $frS, $dst", LdStSTFD, [(store F4RC:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), - "stfdx $frS, $dst", LdStUX, + "stfdx $frS, $dst", LdStSTFD, [(store F8RC:$frS, xaddr:$dst)]>; } @@ -964,7 +969,7 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; } -let isReMaterializable = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, [(set GPRC:$rD, immSExt16:$imm)]>; @@ -1143,6 +1148,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), "crxor $dst, $dst, $dst", BrCR, []>; +let Defs = [CR1EQ], CRD = 6 in { +def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), + "creqv 6, 6, 6", BrCR, + [(PPCcr6set)]>; + +def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), + "crxor 6, 6, 6", BrCR, + [(PPCcr6unset)]>; +} + // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { @@ -1233,7 +1248,7 @@ let Uses = [RM] in { PPC970_DGroup_Single, PPC970_Unit_FPU; def FADDrtz: AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1364,7 +1379,7 @@ def FSELS : AForm_1<63, 23, let Uses = [RM] in { def FADD : AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; def FADDS : AForm_2<59, 21, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), @@ -1388,7 +1403,7 @@ let Uses = [RM] in { [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fsub $FRT, $FRA, $FRB", FPGeneral, + "fsub $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; def FSUBS : AForm_2<59, 20, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 6a6ccb9..660c0c3 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -40,6 +40,7 @@ def IntMulHWU : InstrItinClass; def IntMulLI : InstrItinClass; def IntRFID : InstrItinClass; def IntRotateD : InstrItinClass; +def IntRotateDI : InstrItinClass; def IntRotate : InstrItinClass; def IntShift : InstrItinClass; def IntTrapD : InstrItinClass; @@ -52,15 +53,18 @@ def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; def LdStLoad : InstrItinClass; +def LdStLoadUpd : InstrItinClass; def LdStStore : InstrItinClass; +def LdStStoreUpd : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; -def LdStUX : InstrItinClass; def LdStLD : InstrItinClass; +def LdStLDU : InstrItinClass; def LdStLDARX : InstrItinClass; def LdStLFD : InstrItinClass; def LdStLFDU : InstrItinClass; def LdStLHA : InstrItinClass; +def LdStLHAU : InstrItinClass; def LdStLMW : InstrItinClass; def LdStLVecX : InstrItinClass; def LdStLWA : InstrItinClass; @@ -69,6 +73,9 @@ def LdStSLBIA : InstrItinClass; def LdStSLBIE : InstrItinClass; def LdStSTD : InstrItinClass; def LdStSTDCX : InstrItinClass; +def LdStSTDU : InstrItinClass; +def LdStSTFD : InstrItinClass; +def LdStSTFDU : InstrItinClass; def LdStSTVEBX : InstrItinClass; def LdStSTWCX : InstrItinClass; def LdStSync : InstrItinClass; @@ -86,6 +93,7 @@ def SprMTSRIN : InstrItinClass; def SprRFI : InstrItinClass; def SprSC : InstrItinClass; def FPGeneral : InstrItinClass; +def FPAddSub : InstrItinClass; def FPCompare : InstrItinClass; def FPDivD : InstrItinClass; def FPDivS : InstrItinClass; @@ -110,6 +118,8 @@ include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" include "PPCScheduleA2.td" +include "PPCScheduleE500mc.td" +include "PPCScheduleE5500.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -171,7 +181,7 @@ include "PPCScheduleA2.td" // extsh IntSimple // extsw IntSimple // fabs FPGeneral -// fadd FPGeneral +// fadd FPAddSub // fadds FPGeneral // fcfid FPGeneral // fcmpo FPCompare @@ -201,35 +211,35 @@ include "PPCScheduleA2.td" // fsel FPGeneral // fsqrt FPSqrt // fsqrts FPSqrt -// fsub FPGeneral +// fsub FPAddSub // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC // lbz LdStLoad -// lbzu LdStLoad -// lbzux LdStUX +// lbzu LdStLoadUpd +// lbzux LdStLoadUpd // lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX -// ldu LdStLD -// ldux LdStLD +// ldu LdStLDU +// ldux LdStLDU // ldx LdStLD // lfd LdStLFD // lfdu LdStLFDU // lfdux LdStLFDU -// lfdx LdStLFDU -// lfs LdStLFDU +// lfdx LdStLFD +// lfs LdStLFD // lfsu LdStLFDU // lfsux LdStLFDU -// lfsx LdStLFDU +// lfsx LdStLFD // lha LdStLHA -// lhau LdStLHA -// lhaux LdStLHA +// lhau LdStLHAU +// lhaux LdStLHAU // lhax LdStLHA // lhbrx LdStLoad // lhz LdStLoad -// lhzu LdStLoad -// lhzux LdStUX +// lhzu LdStLoadUpd +// lhzux LdStLoadUpd // lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW @@ -243,12 +253,12 @@ include "PPCScheduleA2.td" // lvxl LdStLVecX // lwa LdStLWA // lwarx LdStLWARX -// lwaux LdStLHA +// lwaux LdStLHAU // lwax LdStLHA // lwbrx LdStLoad // lwz LdStLoad -// lwzu LdStLoad -// lwzux LdStUX +// lwzu LdStLoadUpd +// lwzux LdStLoadUpd // lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral @@ -292,10 +302,10 @@ include "PPCScheduleA2.td" // rfid IntRFID // rldcl IntRotateD // rldcr IntRotateD -// rldic IntRotateD -// rldicl IntRotateD -// rldicr IntRotateD -// rldimi IntRotateD +// rldic IntRotateDI +// rldicl IntRotateDI +// rldicr IntRotateDI +// rldimi IntRotateDI // rlwimi IntRotate // rlwinm IntGeneral // rlwnm IntGeneral @@ -305,33 +315,33 @@ include "PPCScheduleA2.td" // sld IntRotateD // slw IntGeneral // srad IntRotateD -// sradi IntRotateD +// sradi IntRotateDI // sraw IntShift // srawi IntShift // srd IntRotateD // srw IntGeneral // stb LdStStore -// stbu LdStStore -// stbux LdStStore +// stbu LdStStoreUpd +// stbux LdStStoreUpd // stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX -// stdu LdStSTD -// stdux LdStSTD +// stdu LdStSTDU +// stdux LdStSTDU // stdx LdStSTD -// stfd LdStUX -// stfdu LdStUX -// stfdux LdStUX -// stfdx LdStUX -// stfiwx LdStUX -// stfs LdStUX -// stfsu LdStUX -// stfsux LdStUX -// stfsx LdStUX +// stfd LdStSTFD +// stfdu LdStSTFDU +// stfdux LdStSTFDU +// stfdx LdStSTFD +// stfiwx LdStSTFD +// stfs LdStSTFD +// stfsu LdStSTFDU +// stfsux LdStSTFDU +// stfsx LdStSTFD // sth LdStStore // sthbrx LdStStore -// sthu LdStStore -// sthux LdStStore +// sthu LdStStoreUpd +// sthux LdStStoreUpd // sthx LdStStore // stmw LdStLMW // stswi LdStLMW @@ -344,8 +354,8 @@ include "PPCScheduleA2.td" // stw LdStStore // stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStStore -// stwux LdStStore +// stwu LdStStoreUpd +// stwux LdStStoreUpd // stwx LdStStore // subf IntGeneral // subfc IntGeneral diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index cd0fb70..37b6eac 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -288,6 +288,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [9, 5], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -297,6 +306,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -306,7 +324,7 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, InstrStage<1, [LRACC]>, @@ -315,6 +333,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5, 5], [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -342,6 +369,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -371,6 +407,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, @@ -537,6 +582,19 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [FWB]>], [10, 4, 4], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 4d4a5d0..ba63b5c 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -181,6 +181,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [10, 7, 7], [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<IntShift , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -302,7 +313,18 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLD , [InstrStage<4, + InstrItinData<LdStLoadUpd , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -324,6 +346,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -335,7 +368,7 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<4, + InstrItinData<LdStSTFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -346,6 +379,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7, 7], [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -379,6 +423,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -412,6 +467,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -593,6 +659,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], [15, 7, 7], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td new file mode 100644 index 0000000..9bb779a --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -0,0 +1,265 @@ +//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e500mc 32-bit +// Power processor. +// +// All information is derived from the "e500mc Core Reference Manual", +// Freescale Document Number E500MCRM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e500mc core: +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +def DIS0 : FuncUnit; // Dispatch stage - insn 1 +def DIS1 : FuncUnit; // Dispatch stage - insn 2 + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// Some instructions can only execute in SFX0 but not SFX1. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is executed. +def SFX0 : FuncUnit; // Simple unit 0 +def SFX1 : FuncUnit; // Simple unit 1 +def BU : FuncUnit; // Branch unit +def CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def CFX_0 : FuncUnit; // CFX pipeline +def LSU_0 : FuncUnit; // LSU pipeline +def FPU_0 : FuncUnit; // FPU pipeline + +def PPCE500mcItineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<14, [CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11], // Latency = 8 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [5, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [SFX0]>], + [8, 1], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [4, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<68, [FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e500mc machine model for scheduling and other instruction cost heuristics. + +def PPCE500mcModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE500mcItineraries; +} diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td new file mode 100644 index 0000000..d7e11ac --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleE5500.td @@ -0,0 +1,309 @@ +//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e5500 64-bit +// Power processor. +// +// All information is derived from the "e5500 Core Reference Manual", +// Freescale Document Number e5500RM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e5500 core +// (These are the same as for the e500mc) +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +// def DIS0 : FuncUnit; +// def DIS1 : FuncUnit; + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is being executed. +// def SFX0 : FuncUnit; // Simple unit 0 +// def SFX1 : FuncUnit; // Simple unit 1 +// def BU : FuncUnit; // Branch unit +// def CFX_DivBypass +// : FuncUnit; // CFX divide bypass path +// def CFX_0 : FuncUnit; // CFX pipeline stage 0 + +def CFX_1 : FuncUnit; // CFX pipeline stage 1 + +// def LSU_0 : FuncUnit; // LSU pipeline +// def FPU_0 : FuncUnit; // FPU pipeline + + +def PPCE5500Itineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, + LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<26, [CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<16, [CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<7, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<31, [FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<16, [FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e5500 machine model for scheduling and other instruction cost heuristics. + +def PPCE5500Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 6; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE5500Itineraries; +} diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index 61e89ed..72a0a39 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -34,12 +34,16 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>, @@ -58,6 +62,7 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index e19ddfa..fc9120d 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -33,13 +33,17 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, @@ -60,6 +64,7 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index e7446cb..a4e82ce 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -36,19 +36,24 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, - InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, @@ -66,6 +71,7 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index 1371499..7c02ea0 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -27,6 +27,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>, InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>, @@ -37,15 +38,20 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>, @@ -53,6 +59,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>, @@ -69,6 +76,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>, InstrItinData<SprSC , [InstrStage<1, [IU2]>]>, InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>, InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>, InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>, InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 0207c83..b8b1614 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -41,6 +41,8 @@ namespace PPC { DIR_750, DIR_970, DIR_A2, + DIR_E500mc, + DIR_E5500, DIR_PWR6, DIR_PWR7, DIR_64 |