diff options
| author | Stephen Hines <srhines@google.com> | 2012-09-13 19:09:19 -0700 |
|---|---|---|
| committer | Android Git Automerger <android-git-automerger@android.com> | 2012-09-13 19:09:19 -0700 |
| commit | 78c041bd883d86c81c42b98f326660277e6d0d9a (patch) | |
| tree | 52800183ec2d22164b8f396842142c3a8aab912a /lib/Target/PowerPC | |
| parent | 828ded66831c0caaeecd2291a6bfb084f373d0e4 (diff) | |
| parent | 1c4ad5ef4fab105f0c8af7edd026e00502fb6279 (diff) | |
| download | external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.zip external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.tar.gz external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.tar.bz2 | |
am 1c4ad5ef: Merge branch \'upstream\' into merge-2012_09_10
* commit '1c4ad5ef4fab105f0c8af7edd026e00502fb6279': (446 commits)
Revert r163556. Missed updates to tablegen files.
Update function names to conform to guidelines. No functional change intended.
test/CodeGen/X86/ms-inline-asm.ll: Relax for non-darwin x86 targets. '##InlineAsm' could not be seen in other hosts.
[ms-inline asm] Properly emit the asm directives when the AsmPrinterVariant and InlineAsmVariant don't match.
Update test case for Release builds.
Remove redundant semicolons which are null statements.
Disable stack coloring because it makes dragonegg fail bootstrapping.
[ms-inline asm] Pass the correct AsmVariant to the PrintAsmOperand() function and update the printOperand() function accordingly.
[ms-inline asm] Add support for .att_syntax directive.
Enable stack coloring.
Don't attempt to use flags from predicated instructions.
[Object] Extract Elf_Ehdr. Patch by Hemant Kulkarni!
Stack Coloring: Handle the case where END markers come before BEGIN markers properly.
Enhance PR11334 fix to support extload from v2f32/v4f32
Add "blocked" heuristic to the Hexagon MI scheduler.
Fold multiply by 0 or 1 when in UnsafeFPMath mode in SelectionDAG::getNode().
whitespace
Add boolean simplification support from CMOV
Fix an assertion failure when optimising a shufflevector incorrectly into concat_vectors, and a followup bug with SelectionDAG::getNode() creating nodes with invalid types.
Minor cleanup. No functional change.
...
Diffstat (limited to 'lib/Target/PowerPC')
| -rw-r--r-- | lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 2 | ||||
| -rw-r--r-- | lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 1 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPC.td | 10 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCAsmPrinter.cpp | 48 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 9 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 74 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 4 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCInstr64Bit.td | 59 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.cpp | 6 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 93 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCSchedule.td | 88 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCSchedule440.td | 60 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCScheduleA2.td | 81 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCScheduleE500mc.td | 265 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCScheduleE5500.td | 309 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCScheduleG3.td | 7 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCScheduleG4.td | 7 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCScheduleG4Plus.td | 8 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCScheduleG5.td | 10 | ||||
| -rw-r--r-- | lib/Target/PowerPC/PPCSubtarget.h | 2 |
20 files changed, 1005 insertions, 138 deletions
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index d175e3e..413142e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -137,7 +137,7 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { char Value = MI->getOperand(OpNo).getImm(); - Value = (Value << (32-5)) >> (32-5); + Value = SignExtend32<5>(Value); O << (int)Value; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 245b457..b9ea8b5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; - LCOMMDirectiveType = LCOMM::NoAlignment; AssemblerDialect = 0; // Old-Style mnemonics. } diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index b7f1688..cb15dad 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -35,6 +35,10 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; +def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E500mc", "">; +def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E5500", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; @@ -94,6 +98,12 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : ProcessorModel<"e500mc", PPCE500mcModel, + [DirectiveE500mc, FeatureMFOCRF, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; +def : ProcessorModel<"e5500", PPCE5500Model, + [DirectiveE5500, FeatureMFOCRF, Feature64Bit, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureISEL, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index f76b89c..6e0e8bb 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -109,6 +109,8 @@ namespace { bool doFinalization(Module &M); virtual void EmitFunctionEntryLabel(); + + void EmitFunctionBodyEnd(); }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -345,23 +347,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitLabel(PICBase); return; } + case PPC::LDtocJTI: + case PPC::LDtocCPT: case PPC::LDtoc: { // Transform %X3 = LDtoc <ga:@min1>, %X2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - + // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); - assert(MO.isGlobal()); - - // Map symbol -> label of TOC entry. - MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())]; + + // Map symbol -> label of TOC entry + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = 0; + if (MO.isGlobal()) + MOSymbol = Mang->getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + MCSymbol *&TOCEntry = TOC[MOSymbol]; if (TOCEntry == 0) TOCEntry = GetTempSymbol("C", TOCLabelID++); - + const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -406,9 +417,9 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -441,6 +452,23 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyEnd - Print the traceback table before the .size +/// directive. +/// +void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() { + // Only the 64-bit target requires a traceback table. For now, + // we only emit the word of zeroes that GDB requires to find + // the end of the function, and zeroes for the eight-byte + // mandatory fields. + // FIXME: We should fill in the eight-byte mandatory fields as described in + // the PPC64 ELF ABI (this is a low-priority item because GDB does not + // currently make use of these fields). + if (Subtarget.isPPC64()) { + OutStreamer.EmitIntValue(0, 4/*size*/); + OutStreamer.EmitIntValue(0, 8/*size*/); + } +} + void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static const char *const CPUDirectives[] = { "", @@ -453,6 +481,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc750", "ppc970", "ppcA2", + "ppce500mc", + "ppce5500", "power6", "power7", "ppc64" diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index a00f686..e8f4d16 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -975,6 +975,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case ISD::AND: { unsigned Imm, Imm2, SH, MB, ME; + uint64_t Imm64; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm @@ -993,6 +994,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } + // If this is a 64-bit zero-extension mask, emit rldicl. + if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && + isMask_64(Imm64)) { + SDValue Val = N->getOperand(0); + MB = 64 - CountTrailingOnes_64(Imm64); + SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) }; + return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); + } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 61d44c5..dbb3b14 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -449,6 +449,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); + + // The Freescale cores does better with aggressive inlining of memcpy and + // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). + if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || + Subtarget->getDarwinDirective() == PPC::DIR_E5500) { + maxStoresPerMemset = 32; + maxStoresPerMemsetOptSize = 16; + maxStoresPerMemcpy = 32; + maxStoresPerMemcpyOptSize = 8; + maxStoresPerMemmove = 32; + maxStoresPerMemmoveOptSize = 8; + + setPrefFunctionAlignment(4); + benefitFromCodePlacementOpt = true; + } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate @@ -517,6 +532,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; + case PPCISD::CR6SET: return "PPCISD::CR6SET"; + case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; } } @@ -811,14 +828,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { } // Properly sign extend the value. - int ShAmt = (4-ByteSize)*8; - int MaskVal = ((int)Value << ShAmt) >> ShAmt; + int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it - if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) + if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, MVT::i32); return SDValue(); } @@ -1204,6 +1220,14 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); const Constant *C = CP->getConstVal(); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); + return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue CPIHi = @@ -1217,6 +1241,14 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -1441,7 +1473,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, MachinePointerInfo(), MVT::i32, false, false, 0); - return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, false, 0); } @@ -2408,7 +2440,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. - (Addr << 6 >> 6) != Addr) + SignExtend32<26>(Addr) != Addr) return 0; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, @@ -2819,6 +2851,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, isTailCall, RegsToPass, Ops, NodeTys, PPCSubTarget); + // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls + if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); + // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. @@ -3116,14 +3152,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Set CR6 to true if this is a vararg call with floating args passed in - // registers. - if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, - dl, MVT::i32), 0); - RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); - } - // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; @@ -3133,6 +3161,18 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + // Set CR bit 6 to true if this is a vararg call with floating args passed in + // registers. + if (isVarArg) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Ops[] = { Chain, InFlag }; + + Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, + dl, VTs, Ops, InFlag.getNode() ? 2 : 1); + + InFlag = Chain.getValue(1); + } + if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); @@ -4126,7 +4166,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. - if (SextVal == (i << (int)TypeShiftAmt)) { + if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, @@ -4171,17 +4211,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } // t = vsplti c, result = vsldoi t, t, 1 - if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { + if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 - if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 - if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b0a013b..902b188 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -174,6 +174,10 @@ namespace llvm { /// operand #3 optional in flag TC_RETURN, + /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls + CR6SET, + CR6UNSET, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 39778a5..cfe71d17 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -29,6 +29,9 @@ def symbolLo64 : Operand<i64> { let PrintMethod = "printSymbolLo"; let EncoderMethod = "getLO16Encoding"; } +def tocentry : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -296,12 +299,14 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, [(set G8RC:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; +} // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), @@ -459,7 +464,7 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), - "sradi $rA, $rS, $SH", IntRotateD, + "sradi $rA, $rS, $SH", IntRotateDI, [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; } def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), @@ -482,7 +487,7 @@ def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), let isCommutable = 1 in { def RLDIMI : MDForm_1<30, 3, (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldimi $rA, $rS, $SH, $MB", IntRotateD, + "rldimi $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } @@ -494,11 +499,11 @@ def RLDCL : MDForm_1<30, 0, []>, isPPC64; def RLDICL : MDForm_1<30, 0, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldicl $rA, $rS, $SH, $MB", IntRotateD, + "rldicl $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64; def RLDICR : MDForm_1<30, 1, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), - "rldicr $rA, $rS, $SH, $ME", IntRotateD, + "rldicr $rA, $rS, $SH, $ME", IntRotateDI, []>, isPPC64; def RLWINM8 : MForm_2<21, @@ -541,19 +546,19 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLoad, + "lhau $rD, $disp($rA)", LdStLHAU, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwaux $rD, $addr", LdStLoad, + "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -584,31 +589,31 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -624,6 +629,14 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "", [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; +def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; +def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; let hasSideEffects = 1 in { let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. @@ -642,13 +655,13 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), - "ldu $rD, $addr", LdStLD, + "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "ldux $rD, $addr", LdStLoad, + "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -695,14 +708,14 @@ let PPC970_Unit = 2 in { def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -710,7 +723,7 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -718,7 +731,7 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTD, + "stdu $rS, $ptroff($ptrreg)", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, @@ -727,7 +740,7 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -736,7 +749,7 @@ def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -745,7 +758,7 @@ def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -754,7 +767,7 @@ def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStStore, + "stdux $rS, $ptroff, $ptrreg", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 47f09dc..d2df664 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -54,7 +54,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || + Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = TM->getInstrItineraryData(); return new PPCScoreboardHazardRecognizer(II, DAG); } @@ -70,7 +71,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // Most subtargets use a PPC970 recognizer. - if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && + Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { const TargetInstrInfo *TII = TM.getInstrInfo(); assert(TII && "No InstrInfo?"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index f57f0c9..a503908 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -123,9 +123,11 @@ def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, @@ -153,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; +// Instructions to set/unset CR bit 6 for SVR4 vararg calls +def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Instructions to support atomic operations def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, [SDNPHasChain, SDNPMayLoad]>; @@ -330,9 +338,6 @@ def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIXEncoding"; } -def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); -} // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. @@ -673,7 +678,7 @@ def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), - "lfs $rD, $src", LdStLFDU, + "lfs $rD, $src", LdStLFD, [(set F4RC:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, @@ -683,32 +688,32 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLoad, + "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfs $rD, $addr", LdStLFDU, + "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfd $rD, $addr", LdStLFD, + "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -716,37 +721,37 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfsux $rD, $addr", LdStLoad, + "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfdux $rD, $addr", LdStLoad, + "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -778,10 +783,10 @@ def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), - "lfsx $frD, $src", LdStLFDU, + "lfsx $frD, $src", LdStLFD, [(set F4RC:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), - "lfdx $frD, $src", LdStLFDU, + "lfdx $frD, $src", LdStLFD, [(set F8RC:$frD, (load xaddr:$src))]>; } @@ -801,10 +806,10 @@ def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), - "stfs $rS, $dst", LdStUX, + "stfs $rS, $dst", LdStSTFD, [(store F4RC:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), - "stfd $rS, $dst", LdStUX, + "stfd $rS, $dst", LdStSTFD, [(store F8RC:$rS, iaddr:$dst)]>; } @@ -812,33 +817,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStStore, + "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStStore, + "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -863,7 +868,7 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -872,7 +877,7 @@ def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -881,7 +886,7 @@ def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -889,7 +894,7 @@ def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStStore, + "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -897,7 +902,7 @@ def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStStore, + "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -913,14 +918,14 @@ def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), - "stfiwx $frS, $dst", LdStUX, + "stfiwx $frS, $dst", LdStSTFD, [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), - "stfsx $frS, $dst", LdStUX, + "stfsx $frS, $dst", LdStSTFD, [(store F4RC:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), - "stfdx $frS, $dst", LdStUX, + "stfdx $frS, $dst", LdStSTFD, [(store F8RC:$frS, xaddr:$dst)]>; } @@ -964,7 +969,7 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; } -let isReMaterializable = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, [(set GPRC:$rD, immSExt16:$imm)]>; @@ -1143,6 +1148,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), "crxor $dst, $dst, $dst", BrCR, []>; +let Defs = [CR1EQ], CRD = 6 in { +def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), + "creqv 6, 6, 6", BrCR, + [(PPCcr6set)]>; + +def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), + "crxor 6, 6, 6", BrCR, + [(PPCcr6unset)]>; +} + // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { @@ -1233,7 +1248,7 @@ let Uses = [RM] in { PPC970_DGroup_Single, PPC970_Unit_FPU; def FADDrtz: AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1364,7 +1379,7 @@ def FSELS : AForm_1<63, 23, let Uses = [RM] in { def FADD : AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; def FADDS : AForm_2<59, 21, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), @@ -1388,7 +1403,7 @@ let Uses = [RM] in { [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fsub $FRT, $FRA, $FRB", FPGeneral, + "fsub $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; def FSUBS : AForm_2<59, 20, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 6a6ccb9..660c0c3 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -40,6 +40,7 @@ def IntMulHWU : InstrItinClass; def IntMulLI : InstrItinClass; def IntRFID : InstrItinClass; def IntRotateD : InstrItinClass; +def IntRotateDI : InstrItinClass; def IntRotate : InstrItinClass; def IntShift : InstrItinClass; def IntTrapD : InstrItinClass; @@ -52,15 +53,18 @@ def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; def LdStLoad : InstrItinClass; +def LdStLoadUpd : InstrItinClass; def LdStStore : InstrItinClass; +def LdStStoreUpd : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; -def LdStUX : InstrItinClass; def LdStLD : InstrItinClass; +def LdStLDU : InstrItinClass; def LdStLDARX : InstrItinClass; def LdStLFD : InstrItinClass; def LdStLFDU : InstrItinClass; def LdStLHA : InstrItinClass; +def LdStLHAU : InstrItinClass; def LdStLMW : InstrItinClass; def LdStLVecX : InstrItinClass; def LdStLWA : InstrItinClass; @@ -69,6 +73,9 @@ def LdStSLBIA : InstrItinClass; def LdStSLBIE : InstrItinClass; def LdStSTD : InstrItinClass; def LdStSTDCX : InstrItinClass; +def LdStSTDU : InstrItinClass; +def LdStSTFD : InstrItinClass; +def LdStSTFDU : InstrItinClass; def LdStSTVEBX : InstrItinClass; def LdStSTWCX : InstrItinClass; def LdStSync : InstrItinClass; @@ -86,6 +93,7 @@ def SprMTSRIN : InstrItinClass; def SprRFI : InstrItinClass; def SprSC : InstrItinClass; def FPGeneral : InstrItinClass; +def FPAddSub : InstrItinClass; def FPCompare : InstrItinClass; def FPDivD : InstrItinClass; def FPDivS : InstrItinClass; @@ -110,6 +118,8 @@ include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" include "PPCScheduleA2.td" +include "PPCScheduleE500mc.td" +include "PPCScheduleE5500.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -171,7 +181,7 @@ include "PPCScheduleA2.td" // extsh IntSimple // extsw IntSimple // fabs FPGeneral -// fadd FPGeneral +// fadd FPAddSub // fadds FPGeneral // fcfid FPGeneral // fcmpo FPCompare @@ -201,35 +211,35 @@ include "PPCScheduleA2.td" // fsel FPGeneral // fsqrt FPSqrt // fsqrts FPSqrt -// fsub FPGeneral +// fsub FPAddSub // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC // lbz LdStLoad -// lbzu LdStLoad -// lbzux LdStUX +// lbzu LdStLoadUpd +// lbzux LdStLoadUpd // lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX -// ldu LdStLD -// ldux LdStLD +// ldu LdStLDU +// ldux LdStLDU // ldx LdStLD // lfd LdStLFD // lfdu LdStLFDU // lfdux LdStLFDU -// lfdx LdStLFDU -// lfs LdStLFDU +// lfdx LdStLFD +// lfs LdStLFD // lfsu LdStLFDU // lfsux LdStLFDU -// lfsx LdStLFDU +// lfsx LdStLFD // lha LdStLHA -// lhau LdStLHA -// lhaux LdStLHA +// lhau LdStLHAU +// lhaux LdStLHAU // lhax LdStLHA // lhbrx LdStLoad // lhz LdStLoad -// lhzu LdStLoad -// lhzux LdStUX +// lhzu LdStLoadUpd +// lhzux LdStLoadUpd // lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW @@ -243,12 +253,12 @@ include "PPCScheduleA2.td" // lvxl LdStLVecX // lwa LdStLWA // lwarx LdStLWARX -// lwaux LdStLHA +// lwaux LdStLHAU // lwax LdStLHA // lwbrx LdStLoad // lwz LdStLoad -// lwzu LdStLoad -// lwzux LdStUX +// lwzu LdStLoadUpd +// lwzux LdStLoadUpd // lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral @@ -292,10 +302,10 @@ include "PPCScheduleA2.td" // rfid IntRFID // rldcl IntRotateD // rldcr IntRotateD -// rldic IntRotateD -// rldicl IntRotateD -// rldicr IntRotateD -// rldimi IntRotateD +// rldic IntRotateDI +// rldicl IntRotateDI +// rldicr IntRotateDI +// rldimi IntRotateDI // rlwimi IntRotate // rlwinm IntGeneral // rlwnm IntGeneral @@ -305,33 +315,33 @@ include "PPCScheduleA2.td" // sld IntRotateD // slw IntGeneral // srad IntRotateD -// sradi IntRotateD +// sradi IntRotateDI // sraw IntShift // srawi IntShift // srd IntRotateD // srw IntGeneral // stb LdStStore -// stbu LdStStore -// stbux LdStStore +// stbu LdStStoreUpd +// stbux LdStStoreUpd // stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX -// stdu LdStSTD -// stdux LdStSTD +// stdu LdStSTDU +// stdux LdStSTDU // stdx LdStSTD -// stfd LdStUX -// stfdu LdStUX -// stfdux LdStUX -// stfdx LdStUX -// stfiwx LdStUX -// stfs LdStUX -// stfsu LdStUX -// stfsux LdStUX -// stfsx LdStUX +// stfd LdStSTFD +// stfdu LdStSTFDU +// stfdux LdStSTFDU +// stfdx LdStSTFD +// stfiwx LdStSTFD +// stfs LdStSTFD +// stfsu LdStSTFDU +// stfsux LdStSTFDU +// stfsx LdStSTFD // sth LdStStore // sthbrx LdStStore -// sthu LdStStore -// sthux LdStStore +// sthu LdStStoreUpd +// sthux LdStStoreUpd // sthx LdStStore // stmw LdStLMW // stswi LdStLMW @@ -344,8 +354,8 @@ include "PPCScheduleA2.td" // stw LdStStore // stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStStore -// stwux LdStStore +// stwu LdStStoreUpd +// stwux LdStStoreUpd // stwx LdStStore // subf IntGeneral // subfc IntGeneral diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index cd0fb70..37b6eac 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -288,6 +288,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [9, 5], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -297,6 +306,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -306,7 +324,7 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, InstrStage<1, [LRACC]>, @@ -315,6 +333,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5, 5], [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -342,6 +369,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -371,6 +407,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, @@ -537,6 +582,19 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [FWB]>], [10, 4, 4], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 4d4a5d0..ba63b5c 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -181,6 +181,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [10, 7, 7], [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<IntShift , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -302,7 +313,18 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLD , [InstrStage<4, + InstrItinData<LdStLoadUpd , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -324,6 +346,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -335,7 +368,7 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<4, + InstrItinData<LdStSTFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -346,6 +379,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7, 7], [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -379,6 +423,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -412,6 +467,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -593,6 +659,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], [15, 7, 7], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td new file mode 100644 index 0000000..9bb779a --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -0,0 +1,265 @@ +//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e500mc 32-bit +// Power processor. +// +// All information is derived from the "e500mc Core Reference Manual", +// Freescale Document Number E500MCRM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e500mc core: +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +def DIS0 : FuncUnit; // Dispatch stage - insn 1 +def DIS1 : FuncUnit; // Dispatch stage - insn 2 + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// Some instructions can only execute in SFX0 but not SFX1. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is executed. +def SFX0 : FuncUnit; // Simple unit 0 +def SFX1 : FuncUnit; // Simple unit 1 +def BU : FuncUnit; // Branch unit +def CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def CFX_0 : FuncUnit; // CFX pipeline +def LSU_0 : FuncUnit; // LSU pipeline +def FPU_0 : FuncUnit; // FPU pipeline + +def PPCE500mcItineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<14, [CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11], // Latency = 8 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [5, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [SFX0]>], + [8, 1], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [4, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<68, [FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e500mc machine model for scheduling and other instruction cost heuristics. + +def PPCE500mcModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE500mcItineraries; +} diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td new file mode 100644 index 0000000..d7e11ac --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleE5500.td @@ -0,0 +1,309 @@ +//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e5500 64-bit +// Power processor. +// +// All information is derived from the "e5500 Core Reference Manual", +// Freescale Document Number e5500RM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e5500 core +// (These are the same as for the e500mc) +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +// def DIS0 : FuncUnit; +// def DIS1 : FuncUnit; + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is being executed. +// def SFX0 : FuncUnit; // Simple unit 0 +// def SFX1 : FuncUnit; // Simple unit 1 +// def BU : FuncUnit; // Branch unit +// def CFX_DivBypass +// : FuncUnit; // CFX divide bypass path +// def CFX_0 : FuncUnit; // CFX pipeline stage 0 + +def CFX_1 : FuncUnit; // CFX pipeline stage 1 + +// def LSU_0 : FuncUnit; // LSU pipeline +// def FPU_0 : FuncUnit; // FPU pipeline + + +def PPCE5500Itineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, + LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<26, [CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<16, [CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<7, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<31, [FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<16, [FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e5500 machine model for scheduling and other instruction cost heuristics. + +def PPCE5500Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 6; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE5500Itineraries; +} diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index 61e89ed..72a0a39 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -34,12 +34,16 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>, @@ -58,6 +62,7 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index e19ddfa..fc9120d 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -33,13 +33,17 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, @@ -60,6 +64,7 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index e7446cb..a4e82ce 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -36,19 +36,24 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, - InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, @@ -66,6 +71,7 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index 1371499..7c02ea0 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -27,6 +27,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>, InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>, @@ -37,15 +38,20 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>, @@ -53,6 +59,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>, @@ -69,6 +76,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>, InstrItinData<SprSC , [InstrStage<1, [IU2]>]>, InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>, InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>, InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>, InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 0207c83..b8b1614 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -41,6 +41,8 @@ namespace PPC { DIR_750, DIR_970, DIR_A2, + DIR_E500mc, + DIR_E5500, DIR_PWR6, DIR_PWR7, DIR_64 |
