aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp1
-rw-r--r--lib/Target/PowerPC/PPC.td10
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp48
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp9
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp74
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td59
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td93
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td88
-rw-r--r--lib/Target/PowerPC/PPCSchedule440.td60
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td81
-rw-r--r--lib/Target/PowerPC/PPCScheduleE500mc.td265
-rw-r--r--lib/Target/PowerPC/PPCScheduleE5500.td309
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td8
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td10
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h2
20 files changed, 1005 insertions, 138 deletions
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index d175e3e..413142e 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -137,7 +137,7 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
char Value = MI->getOperand(OpNo).getImm();
- Value = (Value << (32-5)) >> (32-5);
+ Value = SignExtend32<5>(Value);
O << (int)Value;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 245b457..b9ea8b5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
- LCOMMDirectiveType = LCOMM::NoAlignment;
AssemblerDialect = 0; // Old-Style mnemonics.
}
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index b7f1688..cb15dad 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -35,6 +35,10 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
+def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E500mc", "">;
+def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E5500", "">;
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
@@ -94,6 +98,12 @@ def : Processor<"g5", G5Itineraries,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"e500mc", PPCE500mcModel,
+ [DirectiveE500mc, FeatureMFOCRF,
+ FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+def : ProcessorModel<"e5500", PPCE5500Model,
+ [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
+ FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
FeatureMFOCRF, FeatureFSqrt,
FeatureSTFIWX, FeatureISEL,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f76b89c..6e0e8bb 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -109,6 +109,8 @@ namespace {
bool doFinalization(Module &M);
virtual void EmitFunctionEntryLabel();
+
+ void EmitFunctionBodyEnd();
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
@@ -345,23 +347,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitLabel(PICBase);
return;
}
+ case PPC::LDtocJTI:
+ case PPC::LDtocCPT:
case PPC::LDtoc: {
// Transform %X3 = LDtoc <ga:@min1>, %X2
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
-
+
// Change the opcode to LD, and the global address operand to be a
// reference to the TOC entry we will synthesize later.
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
- assert(MO.isGlobal());
-
- // Map symbol -> label of TOC entry.
- MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())];
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = 0;
+ if (MO.isGlobal())
+ MOSymbol = Mang->getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+ MCSymbol *&TOCEntry = TOC[MOSymbol];
if (TOCEntry == 0)
TOCEntry = GetTempSymbol("C", TOCLabelID++);
-
+
const MCExpr *Exp =
- MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
@@ -406,9 +417,9 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase"));
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
- Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/);
+ 8/*size*/, 0/*addrspace*/);
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext),
- Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/);
+ 8/*size*/, 0/*addrspace*/);
OutStreamer.SwitchSection(Current);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -441,6 +452,23 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+/// EmitFunctionBodyEnd - Print the traceback table before the .size
+/// directive.
+///
+void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
+ // Only the 64-bit target requires a traceback table. For now,
+ // we only emit the word of zeroes that GDB requires to find
+ // the end of the function, and zeroes for the eight-byte
+ // mandatory fields.
+ // FIXME: We should fill in the eight-byte mandatory fields as described in
+ // the PPC64 ELF ABI (this is a low-priority item because GDB does not
+ // currently make use of these fields).
+ if (Subtarget.isPPC64()) {
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ OutStreamer.EmitIntValue(0, 8/*size*/);
+ }
+}
+
void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
static const char *const CPUDirectives[] = {
"",
@@ -453,6 +481,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppc750",
"ppc970",
"ppcA2",
+ "ppce500mc",
+ "ppce5500",
"power6",
"power7",
"ppc64"
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index a00f686..e8f4d16 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -975,6 +975,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::AND: {
unsigned Imm, Imm2, SH, MB, ME;
+ uint64_t Imm64;
// If this is an and of a value rotated between 0 and 31 bits and then and'd
// with a mask, emit rlwinm
@@ -993,6 +994,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
+ isMask_64(Imm64)) {
+ SDValue Val = N->getOperand(0);
+ MB = 64 - CountTrailingOnes_64(Imm64);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) };
+ return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3);
+ }
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
ReplaceUses(SDValue(N, 0), N->getOperand(1));
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 61d44c5..dbb3b14 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -449,6 +449,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
+
+ // The Freescale cores does better with aggressive inlining of memcpy and
+ // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+ if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ maxStoresPerMemset = 32;
+ maxStoresPerMemsetOptSize = 16;
+ maxStoresPerMemcpy = 32;
+ maxStoresPerMemcpyOptSize = 8;
+ maxStoresPerMemmove = 32;
+ maxStoresPerMemmoveOptSize = 8;
+
+ setPrefFunctionAlignment(4);
+ benefitFromCodePlacementOpt = true;
+ }
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
@@ -517,6 +532,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::CR6SET: return "PPCISD::CR6SET";
+ case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
}
}
@@ -811,14 +828,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
}
// Properly sign extend the value.
- int ShAmt = (4-ByteSize)*8;
- int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+ int MaskVal = SignExtend32(Value, ByteSize * 8);
// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
if (MaskVal == 0) return SDValue();
// Finally, if this value fits in a 5 bit sext field, return it
- if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ if (SignExtend32<5>(MaskVal) == MaskVal)
return DAG.getTargetConstant(MaskVal, MVT::i32);
return SDValue();
}
@@ -1204,6 +1220,14 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
const Constant *C = CP->getConstVal();
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue CPIHi =
@@ -1217,6 +1241,14 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@@ -1441,7 +1473,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
MachinePointerInfo(),
MVT::i32, false, false, 0);
- return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
false, false, false, 0);
}
@@ -2408,7 +2440,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
int Addr = C->getZExtValue();
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
- (Addr << 6 >> 6) != Addr)
+ SignExtend32<26>(Addr) != Addr)
return 0; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2,
@@ -2819,6 +2851,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
isTailCall, RegsToPass, Ops, NodeTys,
PPCSubTarget);
+ // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+ if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
@@ -3116,14 +3152,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Set CR6 to true if this is a vararg call with floating args passed in
- // registers.
- if (isVarArg) {
- SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
- dl, MVT::i32), 0);
- RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
- }
-
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
@@ -3133,6 +3161,18 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
+ // Set CR bit 6 to true if this is a vararg call with floating args passed in
+ // registers.
+ if (isVarArg) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, InFlag };
+
+ Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
+ dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+
+ InFlag = Chain.getValue(1);
+ }
+
if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
@@ -4126,7 +4166,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
unsigned TypeShiftAmt = i & (SplatBitSize-1);
// vsplti + shl self.
- if (SextVal == (i << (int)TypeShiftAmt)) {
+ if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
@@ -4171,17 +4211,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
// t = vsplti c, result = vsldoi t, t, 1
- if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
- if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
- if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b0a013b..902b188 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -174,6 +174,10 @@ namespace llvm {
/// operand #3 optional in flag
TC_RETURN,
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 39778a5..cfe71d17 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -29,6 +29,9 @@ def symbolLo64 : Operand<i64> {
let PrintMethod = "printSymbolLo";
let EncoderMethod = "getLO16Encoding";
}
+def tocentry : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@@ -296,12 +299,14 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
"li $rD, $imm", IntSimple,
[(set G8RC:$rD, immSExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
"lis $rD, $imm", IntSimple,
[(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+}
// Logical ops.
def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
@@ -459,7 +464,7 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
let Defs = [CARRY] in {
def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
- "sradi $rA, $rS, $SH", IntRotateD,
+ "sradi $rA, $rS, $SH", IntRotateDI,
[(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
}
def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
@@ -482,7 +487,7 @@ def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
let isCommutable = 1 in {
def RLDIMI : MDForm_1<30, 3,
(outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
- "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+ "rldimi $rA, $rS, $SH, $MB", IntRotateDI,
[]>, isPPC64, RegConstraint<"$rSi = $rA">,
NoEncode<"$rSi">;
}
@@ -494,11 +499,11 @@ def RLDCL : MDForm_1<30, 0,
[]>, isPPC64;
def RLDICL : MDForm_1<30, 0,
(outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
- "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+ "rldicl $rA, $rS, $SH, $MB", IntRotateDI,
[]>, isPPC64;
def RLDICR : MDForm_1<30, 1,
(outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
- "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+ "rldicr $rA, $rS, $SH, $ME", IntRotateDI,
[]>, isPPC64;
def RLWINM8 : MForm_2<21,
@@ -541,19 +546,19 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
let mayLoad = 1 in
def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStLoad,
+ "lhau $rD, $disp($rA)", LdStLHAU,
[]>, RegConstraint<"$rA = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLoad,
+ "lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwaux $rD, $addr", LdStLoad,
+ "lwaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -584,31 +589,31 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
// Update forms.
let mayLoad = 1 in {
def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoad,
+ "lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoad,
+ "lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoad,
+ "lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoad,
+ "lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoad,
+ "lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoad,
+ "lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -624,6 +629,14 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"",
[(set G8RC:$rD,
(PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "",
+ [(set G8RC:$rD,
+ (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
+def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "",
+ [(set G8RC:$rD,
+ (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
let hasSideEffects = 1 in {
let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
@@ -642,13 +655,13 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
let mayLoad = 1 in
def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
- "ldu $rD, $addr", LdStLD,
+ "ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "ldux $rD, $addr", LdStLoad,
+ "ldux $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -695,14 +708,14 @@ let PPC970_Unit = 2 in {
def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStore,
+ "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStore,
+ "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
@@ -710,7 +723,7 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStore,
+ "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
@@ -718,7 +731,7 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
s16immX4:$ptroff, ptr_rc:$ptrreg),
- "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+ "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
[(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
@@ -727,7 +740,7 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -736,7 +749,7 @@ def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -745,7 +758,7 @@ def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti32 G8RC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -754,7 +767,7 @@ def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
(ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stdux $rS, $ptroff, $ptrreg", LdStStore,
+ "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
[(set ptr_rc:$ea_res,
(pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 47f09dc..d2df664 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -54,7 +54,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
const TargetMachine *TM,
const ScheduleDAG *DAG) const {
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
- if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) {
+ if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
+ Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II = TM->getInstrItineraryData();
return new PPCScoreboardHazardRecognizer(II, DAG);
}
@@ -70,7 +71,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
// Most subtargets use a PPC970 recognizer.
- if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) {
+ if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
+ Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
const TargetInstrInfo *TII = TM.getInstrInfo();
assert(TII && "No InstrInfo?");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f57f0c9..a503908 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -123,9 +123,11 @@ def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
@@ -153,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
+// Instructions to set/unset CR bit 6 for SVR4 vararg calls
+def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
// Instructions to support atomic operations
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
[SDNPHasChain, SDNPMayLoad]>;
@@ -330,9 +338,6 @@ def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
-def tocentry : Operand<iPTR> {
- let MIOperandInfo = (ops i32imm:$imm);
-}
// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
// that doesn't matter.
@@ -673,7 +678,7 @@ def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
[(set GPRC:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
- "lfs $rD, $src", LdStLFDU,
+ "lfs $rD, $src", LdStLFD,
[(set F4RC:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
"lfd $rD, $src", LdStLFD,
@@ -683,32 +688,32 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoad,
+ "lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhau $rD, $addr", LdStLoad,
+ "lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoad,
+ "lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoad,
+ "lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lfs $rD, $addr", LdStLFDU,
+ "lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
- "lfd $rD, $addr", LdStLFD,
+ "lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
@@ -716,37 +721,37 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
// Indexed (r+r) Loads with Update (preinc).
def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoad,
+ "lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLoad,
+ "lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoad,
+ "lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoad,
+ "lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lfsux $rD, $addr", LdStLoad,
+ "lfsux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
- "lfdux $rD, $addr", LdStLoad,
+ "lfdux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -778,10 +783,10 @@ def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
[(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
- "lfsx $frD, $src", LdStLFDU,
+ "lfsx $frD, $src", LdStLFD,
[(set F4RC:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
- "lfdx $frD, $src", LdStLFDU,
+ "lfdx $frD, $src", LdStLFD,
[(set F8RC:$frD, (load xaddr:$src))]>;
}
@@ -801,10 +806,10 @@ def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
"stw $rS, $src", LdStStore,
[(store GPRC:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
- "stfs $rS, $dst", LdStUX,
+ "stfs $rS, $dst", LdStSTFD,
[(store F4RC:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
- "stfd $rS, $dst", LdStUX,
+ "stfd $rS, $dst", LdStSTFD,
[(store F8RC:$rS, iaddr:$dst)]>;
}
@@ -812,33 +817,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
let PPC970_Unit = 2 in {
def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStore,
+ "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStore,
+ "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStore,
+ "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStStore,
+ "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
[(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStStore,
+ "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
[(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
@@ -863,7 +868,7 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti8 GPRC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -872,7 +877,7 @@ def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_truncsti16 GPRC:$rS,
ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
@@ -881,7 +886,7 @@ def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
(ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
[(set ptr_rc:$ea_res,
(pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -889,7 +894,7 @@ def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
(ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfsux $rS, $ptroff, $ptrreg", LdStStore,
+ "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
[(set ptr_rc:$ea_res,
(pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -897,7 +902,7 @@ def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
(ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfdux $rS, $ptroff, $ptrreg", LdStStore,
+ "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
[(set ptr_rc:$ea_res,
(pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
@@ -913,14 +918,14 @@ def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
- "stfiwx $frS, $dst", LdStUX,
+ "stfiwx $frS, $dst", LdStSTFD,
[(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
- "stfsx $frS, $dst", LdStUX,
+ "stfsx $frS, $dst", LdStSTFD,
[(store F4RC:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
- "stfdx $frS, $dst", LdStUX,
+ "stfdx $frS, $dst", LdStSTFD,
[(store F8RC:$frS, xaddr:$dst)]>;
}
@@ -964,7 +969,7 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
[(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
}
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
"li $rD, $imm", IntSimple,
[(set GPRC:$rD, immSExt16:$imm)]>;
@@ -1143,6 +1148,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
"crxor $dst, $dst, $dst", BrCR,
[]>;
+let Defs = [CR1EQ], CRD = 6 in {
+def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
+ "creqv 6, 6, 6", BrCR,
+ [(PPCcr6set)]>;
+
+def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
+ "crxor 6, 6, 6", BrCR,
+ [(PPCcr6unset)]>;
+}
+
// XFX-Form instructions. Instructions that deal with SPRs.
//
let Uses = [CTR] in {
@@ -1233,7 +1248,7 @@ let Uses = [RM] in {
PPC970_DGroup_Single, PPC970_Unit_FPU;
def FADDrtz: AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPGeneral,
+ "fadd $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
@@ -1364,7 +1379,7 @@ def FSELS : AForm_1<63, 23,
let Uses = [RM] in {
def FADD : AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPGeneral,
+ "fadd $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
def FADDS : AForm_2<59, 21,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
@@ -1388,7 +1403,7 @@ let Uses = [RM] in {
[(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
def FSUB : AForm_2<63, 20,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fsub $FRT, $FRA, $FRB", FPGeneral,
+ "fsub $FRT, $FRA, $FRB", FPAddSub,
[(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
def FSUBS : AForm_2<59, 20,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 6a6ccb9..660c0c3 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -40,6 +40,7 @@ def IntMulHWU : InstrItinClass;
def IntMulLI : InstrItinClass;
def IntRFID : InstrItinClass;
def IntRotateD : InstrItinClass;
+def IntRotateDI : InstrItinClass;
def IntRotate : InstrItinClass;
def IntShift : InstrItinClass;
def IntTrapD : InstrItinClass;
@@ -52,15 +53,18 @@ def LdStDCBA : InstrItinClass;
def LdStDCBF : InstrItinClass;
def LdStDCBI : InstrItinClass;
def LdStLoad : InstrItinClass;
+def LdStLoadUpd : InstrItinClass;
def LdStStore : InstrItinClass;
+def LdStStoreUpd : InstrItinClass;
def LdStDSS : InstrItinClass;
def LdStICBI : InstrItinClass;
-def LdStUX : InstrItinClass;
def LdStLD : InstrItinClass;
+def LdStLDU : InstrItinClass;
def LdStLDARX : InstrItinClass;
def LdStLFD : InstrItinClass;
def LdStLFDU : InstrItinClass;
def LdStLHA : InstrItinClass;
+def LdStLHAU : InstrItinClass;
def LdStLMW : InstrItinClass;
def LdStLVecX : InstrItinClass;
def LdStLWA : InstrItinClass;
@@ -69,6 +73,9 @@ def LdStSLBIA : InstrItinClass;
def LdStSLBIE : InstrItinClass;
def LdStSTD : InstrItinClass;
def LdStSTDCX : InstrItinClass;
+def LdStSTDU : InstrItinClass;
+def LdStSTFD : InstrItinClass;
+def LdStSTFDU : InstrItinClass;
def LdStSTVEBX : InstrItinClass;
def LdStSTWCX : InstrItinClass;
def LdStSync : InstrItinClass;
@@ -86,6 +93,7 @@ def SprMTSRIN : InstrItinClass;
def SprRFI : InstrItinClass;
def SprSC : InstrItinClass;
def FPGeneral : InstrItinClass;
+def FPAddSub : InstrItinClass;
def FPCompare : InstrItinClass;
def FPDivD : InstrItinClass;
def FPDivS : InstrItinClass;
@@ -110,6 +118,8 @@ include "PPCScheduleG4.td"
include "PPCScheduleG4Plus.td"
include "PPCScheduleG5.td"
include "PPCScheduleA2.td"
+include "PPCScheduleE500mc.td"
+include "PPCScheduleE5500.td"
//===----------------------------------------------------------------------===//
// Instruction to itinerary class map - When add new opcodes to the supported
@@ -171,7 +181,7 @@ include "PPCScheduleA2.td"
// extsh IntSimple
// extsw IntSimple
// fabs FPGeneral
-// fadd FPGeneral
+// fadd FPAddSub
// fadds FPGeneral
// fcfid FPGeneral
// fcmpo FPCompare
@@ -201,35 +211,35 @@ include "PPCScheduleA2.td"
// fsel FPGeneral
// fsqrt FPSqrt
// fsqrts FPSqrt
-// fsub FPGeneral
+// fsub FPAddSub
// fsubs FPGeneral
// icbi LdStICBI
// isync SprISYNC
// lbz LdStLoad
-// lbzu LdStLoad
-// lbzux LdStUX
+// lbzu LdStLoadUpd
+// lbzux LdStLoadUpd
// lbzx LdStLoad
// ld LdStLD
// ldarx LdStLDARX
-// ldu LdStLD
-// ldux LdStLD
+// ldu LdStLDU
+// ldux LdStLDU
// ldx LdStLD
// lfd LdStLFD
// lfdu LdStLFDU
// lfdux LdStLFDU
-// lfdx LdStLFDU
-// lfs LdStLFDU
+// lfdx LdStLFD
+// lfs LdStLFD
// lfsu LdStLFDU
// lfsux LdStLFDU
-// lfsx LdStLFDU
+// lfsx LdStLFD
// lha LdStLHA
-// lhau LdStLHA
-// lhaux LdStLHA
+// lhau LdStLHAU
+// lhaux LdStLHAU
// lhax LdStLHA
// lhbrx LdStLoad
// lhz LdStLoad
-// lhzu LdStLoad
-// lhzux LdStUX
+// lhzu LdStLoadUpd
+// lhzux LdStLoadUpd
// lhzx LdStLoad
// lmw LdStLMW
// lswi LdStLMW
@@ -243,12 +253,12 @@ include "PPCScheduleA2.td"
// lvxl LdStLVecX
// lwa LdStLWA
// lwarx LdStLWARX
-// lwaux LdStLHA
+// lwaux LdStLHAU
// lwax LdStLHA
// lwbrx LdStLoad
// lwz LdStLoad
-// lwzu LdStLoad
-// lwzux LdStUX
+// lwzu LdStLoadUpd
+// lwzux LdStLoadUpd
// lwzx LdStLoad
// mcrf BrMCR
// mcrfs FPGeneral
@@ -292,10 +302,10 @@ include "PPCScheduleA2.td"
// rfid IntRFID
// rldcl IntRotateD
// rldcr IntRotateD
-// rldic IntRotateD
-// rldicl IntRotateD
-// rldicr IntRotateD
-// rldimi IntRotateD
+// rldic IntRotateDI
+// rldicl IntRotateDI
+// rldicr IntRotateDI
+// rldimi IntRotateDI
// rlwimi IntRotate
// rlwinm IntGeneral
// rlwnm IntGeneral
@@ -305,33 +315,33 @@ include "PPCScheduleA2.td"
// sld IntRotateD
// slw IntGeneral
// srad IntRotateD
-// sradi IntRotateD
+// sradi IntRotateDI
// sraw IntShift
// srawi IntShift
// srd IntRotateD
// srw IntGeneral
// stb LdStStore
-// stbu LdStStore
-// stbux LdStStore
+// stbu LdStStoreUpd
+// stbux LdStStoreUpd
// stbx LdStStore
// std LdStSTD
// stdcx. LdStSTDCX
-// stdu LdStSTD
-// stdux LdStSTD
+// stdu LdStSTDU
+// stdux LdStSTDU
// stdx LdStSTD
-// stfd LdStUX
-// stfdu LdStUX
-// stfdux LdStUX
-// stfdx LdStUX
-// stfiwx LdStUX
-// stfs LdStUX
-// stfsu LdStUX
-// stfsux LdStUX
-// stfsx LdStUX
+// stfd LdStSTFD
+// stfdu LdStSTFDU
+// stfdux LdStSTFDU
+// stfdx LdStSTFD
+// stfiwx LdStSTFD
+// stfs LdStSTFD
+// stfsu LdStSTFDU
+// stfsux LdStSTFDU
+// stfsx LdStSTFD
// sth LdStStore
// sthbrx LdStStore
-// sthu LdStStore
-// sthux LdStStore
+// sthu LdStStoreUpd
+// sthux LdStStoreUpd
// sthx LdStStore
// stmw LdStLMW
// stswi LdStLMW
@@ -344,8 +354,8 @@ include "PPCScheduleA2.td"
// stw LdStStore
// stwbrx LdStStore
// stwcx. LdStSTWCX
-// stwu LdStStore
-// stwux LdStStore
+// stwu LdStStoreUpd
+// stwux LdStStoreUpd
// stwx LdStStore
// subf IntGeneral
// subfc IntGeneral
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
index cd0fb70..37b6eac 100644
--- a/lib/Target/PowerPC/PPCSchedule440.td
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -288,6 +288,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [LWB]>],
[9, 5],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -297,6 +306,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -306,7 +324,7 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
@@ -315,6 +333,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [LWB]>],
[8, 5, 5],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -342,6 +369,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -371,6 +407,15 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<2, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1]>,
@@ -537,6 +582,19 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [FWB]>],
[10, 4, 4],
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index 4d4a5d0..ba63b5c 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -181,6 +181,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[10, 7, 7],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateDI , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntShift , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -302,7 +313,18 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLD , [InstrStage<4,
+ InstrItinData<LdStLoadUpd , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDU , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
IU4_4, IU4_5, IU4_6, IU4_7]>,
@@ -324,6 +346,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[13, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStICBI , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -335,7 +368,7 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStUX , [InstrStage<4,
+ InstrItinData<LdStSTFD , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
IU4_4, IU4_5, IU4_6, IU4_7]>,
@@ -346,6 +379,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7, 7],
[NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7, 7],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<LdStLFD , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -379,6 +423,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [NoBypass, GPR_Bypass]>,
InstrItinData<LdStLMW , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -412,6 +467,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[13, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [13, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStSTDCX , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -593,6 +659,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
[15, 7, 7],
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+ InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+ InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+ InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+ [15, 7, 7],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPCompare , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
new file mode 100644
index 0000000..9bb779a
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -0,0 +1,265 @@
+//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e500mc 32-bit
+// Power processor.
+//
+// All information is derived from the "e500mc Core Reference Manual",
+// Freescale Document Number E500MCRM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e500mc core:
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+def DIS0 : FuncUnit; // Dispatch stage - insn 1
+def DIS1 : FuncUnit; // Dispatch stage - insn 2
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// Some instructions can only execute in SFX0 but not SFX1.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is executed.
+def SFX0 : FuncUnit; // Simple unit 0
+def SFX1 : FuncUnit; // Simple unit 1
+def BU : FuncUnit; // Branch unit
+def CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def CFX_0 : FuncUnit; // CFX pipeline
+def LSU_0 : FuncUnit; // LSU pipeline
+def FPU_0 : FuncUnit; // FPU pipeline
+
+def PPCE500mcItineraries : ProcessorItineraries<
+ [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
+ [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 1, 1], // Latency = 1 or 2
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<14, [CFX_DivBypass]>],
+ [17, 1, 1], // Latency=4..35, Repeat= 4..35
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<8, [FPU_0]>],
+ [11], // Latency = 8
+ [FPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<8, [FPU_0]>],
+ [11, 1, 1], // Latency = 8
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0]>],
+ [5, 1], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1], // Latency = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1, 1], // Latency = 1
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [4, 1], // Latency = 1
+ [CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 1], // Latency = r+3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [6, 1, 1], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>]>,
+ InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [5, 1], // Latency = 2, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0]>],
+ [5, 1],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0], 0>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<5, [SFX0]>],
+ [8, 1],
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0]>],
+ [4, 1],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [FPU_0]>],
+ [13, 1, 1], // Latency = 10, Repeat rate = 4
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<68, [FPU_0]>],
+ [71, 1, 1], // Latency = 68, Repeat rate = 68
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<38, [FPU_0]>],
+ [41, 1, 1], // Latency = 38, Repeat rate = 38
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [FPU_0]>],
+ [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<38, [FPU_0]>],
+ [41, 1], // Latency = 38, Repeat rate = 38
+ [FPR_Bypass, FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e500mc machine model for scheduling and other instruction cost heuristics.
+
+def PPCE500mcModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE500mcItineraries;
+}
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
new file mode 100644
index 0000000..d7e11ac
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -0,0 +1,309 @@
+//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e5500 64-bit
+// Power processor.
+//
+// All information is derived from the "e5500 Core Reference Manual",
+// Freescale Document Number e5500RM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e5500 core
+// (These are the same as for the e500mc)
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+// def DIS0 : FuncUnit;
+// def DIS1 : FuncUnit;
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is being executed.
+// def SFX0 : FuncUnit; // Simple unit 0
+// def SFX1 : FuncUnit; // Simple unit 1
+// def BU : FuncUnit; // Branch unit
+// def CFX_DivBypass
+// : FuncUnit; // CFX divide bypass path
+// def CFX_0 : FuncUnit; // CFX pipeline stage 0
+
+def CFX_1 : FuncUnit; // CFX pipeline stage 1
+
+// def LSU_0 : FuncUnit; // LSU pipeline
+// def FPU_0 : FuncUnit; // FPU pipeline
+
+
+def PPCE5500Itineraries : ProcessorItineraries<
+ [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
+ LSU_0, FPU_0],
+ [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 1 or 2
+ [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<26, [CFX_DivBypass]>],
+ [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<16, [CFX_DivBypass]>],
+ [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<7, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 7
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<2, [CFX_1]>],
+ [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<1, [CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<1, [CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0], 0>,
+ InstrStage<2, [CFX_1]>],
+ [8, 2, 2], // Latency = 4 or 5, Repeat = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5, 2, 2], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0, SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [SFX0]>],
+ [6, 2], // Latency = 2, Repeat rate = 2
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2], // Latency = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2, 2], // Latency = 1
+ [CR_Bypass, CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [BU]>],
+ [5, 2], // Latency = 1
+ [CR_Bypass, CR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [5, 2, 2], // Latency = 1
+ [CR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [FPR_Bypass, GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [GPR_Bypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [LSU_0]>],
+ [8, 2], // Latency = r+3, Repeat rate = r+3
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<3, [LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 3
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [CFX_0]>],
+ [6, 2], // Latency = 2, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [LSU_0], 0>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<5, [CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [GPR_Bypass, CR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [SFX0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<4, [CFX_0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [SFX0, SFX1]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<31, [FPU_0]>],
+ [39, 2, 2], // Latency = 35, Repeat rate = 31
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<16, [FPU_0]>],
+ [24, 2, 2], // Latency = 20, Repeat rate = 16
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<1, [FPU_0]>],
+ [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
+ InstrStage<2, [FPU_0]>],
+ [12, 2], // Latency = 8, Repeat rate = 2
+ [FPR_Bypass, FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def PPCE5500Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE5500Itineraries;
+}
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index 61e89ed..72a0a39 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -34,12 +34,16 @@ def G3Itineraries : ProcessorItineraries<
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
@@ -58,6 +62,7 @@ def G3Itineraries : ProcessorItineraries<
InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index e19ddfa..fc9120d 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -33,13 +33,17 @@ def G4Itineraries : ProcessorItineraries<
InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
@@ -60,6 +64,7 @@ def G4Itineraries : ProcessorItineraries<
InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index e7446cb..a4e82ce 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -36,19 +36,24 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
- InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
@@ -66,6 +71,7 @@ def G4PlusItineraries : ProcessorItineraries<
InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>,
InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 1371499..7c02ea0 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -27,6 +27,7 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
@@ -37,15 +38,20 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
- InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>,
InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
@@ -53,6 +59,7 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
@@ -69,6 +76,7 @@ def G5Itineraries : ProcessorItineraries<
InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>,
InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 0207c83..b8b1614 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -41,6 +41,8 @@ namespace PPC {
DIR_750,
DIR_970,
DIR_A2,
+ DIR_E500mc,
+ DIR_E5500,
DIR_PWR6,
DIR_PWR7,
DIR_64