diff options
Diffstat (limited to 'lib/Target/PowerPC')
37 files changed, 3344 insertions, 1264 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index a8f7509..fe83fe1 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" @@ -174,6 +175,7 @@ struct PPCOperand; class PPCAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCInstrInfo &MII; bool IsPPC64; MCAsmParser &getParser() const { return Parser; } @@ -218,8 +220,9 @@ class PPCAsmParser : public MCTargetAsmParser { public: - PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &_MII) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || @@ -235,6 +238,10 @@ public: virtual bool ParseDirective(AsmToken DirectiveID); unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); + + virtual const MCExpr *applyModifierToExpr(const MCExpr *E, + MCSymbolRefExpr::VariantKind, + MCContext &Ctx); }; /// PPCOperand - Instances of this class represent a parsed PowerPC machine @@ -900,19 +907,19 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) { RegNo = PPC::VRSAVE; IntVal = 256; return false; - } else if (Name.substr(0, 1).equals_lower("r") && + } else if (Name.startswith_lower("r") && !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal]; return false; - } else if (Name.substr(0, 1).equals_lower("f") && + } else if (Name.startswith_lower("f") && !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { RegNo = FRegs[IntVal]; return false; - } else if (Name.substr(0, 1).equals_lower("v") && + } else if (Name.startswith_lower("v") && !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { RegNo = VRegs[IntVal]; return false; - } else if (Name.substr(0, 2).equals_lower("cr") && + } else if (Name.startswith_lower("cr") && !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) { RegNo = CRRegs[IntVal]; return false; @@ -1353,6 +1360,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, switch (Kind) { case MCK_0: ImmVal = 0; break; case MCK_1: ImmVal = 1; break; + case MCK_2: ImmVal = 2; break; + case MCK_3: ImmVal = 3; break; default: return Match_InvalidOperand; } @@ -1363,3 +1372,26 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, return Match_InvalidOperand; } +const MCExpr * +PPCAsmParser::applyModifierToExpr(const MCExpr *E, + MCSymbolRefExpr::VariantKind Variant, + MCContext &Ctx) { + switch (Variant) { + case MCSymbolRefExpr::VK_PPC_LO: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HI: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HA: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HIGHER: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HIGHERA: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HIGHEST: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx); + case MCSymbolRefExpr::VK_PPC_HIGHESTA: + return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx); + default: + return 0; + } +} diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 08d7665..8281b5c 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -18,9 +18,17 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; +// FIXME: Once the integrated assembler supports full register names, tie this +// to the verbose-asm setting. +static cl::opt<bool> +FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false), + cl::desc("Use full register names when printing assembly")); + #include "PPCGenAsmWriter.inc" void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { @@ -78,6 +86,17 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } + // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is + // used when converting a 32-bit float to a 64-bit float as part of + // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()), + // as otherwise we have problems with incorrect register classes + // in machine instruction verification. For now, just avoid trying + // to print it as such an instruction has no effect (a 32-bit float + // in a register is already in 64-bit form, just with lower + // precision). FIXME: Is there a better solution? + if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS) + return; + printInstruction(MI, O); printAnnotation(O, Annot); } @@ -285,6 +304,9 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, /// stripRegisterPrefix - This method strips the character prefix from a /// register name so that only the number is left. Used by for linux asm. static const char *stripRegisterPrefix(const char *RegName) { + if (FullRegNames) + return RegName; + switch (RegName[0]) { case 'r': case 'f': diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index 45be471..3efa5ec 100644 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMPowerPCDesc PPCMCCodeEmitter.cpp PPCMCExpr.cpp PPCPredicates.cpp + PPCMachObjectWriter.cpp PPCELFObjectWriter.cpp ) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index b2a8701..0d42081 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -16,9 +16,9 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -69,19 +69,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { } namespace { -class PPCMachObjectWriter : public MCMachObjectTargetWriter { -public: - PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} - - void RecordRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) { - llvm_unreachable("Relocation emission for MachO/PPC unimplemented!"); - } -}; class PPCAsmBackend : public MCAsmBackend { const Target &TheTarget; @@ -145,14 +132,17 @@ public: } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // Can't emit NOP with size not multiple of 32-bits - if (Count % 4 != 0) - return false; - uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) OW->Write32(0x60000000); + switch (Count % 4) { + default: break; // No leftover bytes to write + case 1: OW->Write8(0); break; + case 2: OW->Write16(0); break; + case 3: OW->Write16(0); OW->Write8(0); break; + } + return true; } @@ -174,12 +164,11 @@ namespace { MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; - return createMachObjectWriter(new PPCMachObjectWriter( - /*Is64Bit=*/is64, - (is64 ? object::mach::CTM_PowerPC64 : - object::mach::CTM_PowerPC), - object::mach::CSPPC_ALL), - OS, /*IsLittleEndian=*/false); + return createPPCMachObjectWriter( + OS, + /*Is64Bit=*/is64, + (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC), + MachO::CPU_SUBTYPE_POWERPC_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { @@ -206,10 +195,9 @@ namespace { } // end anonymous namespace - - - -MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) { +MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 6822507..f3dddce 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -22,7 +22,6 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { } IsLittleEndian = false; - PCSymbol = "."; CommentString = ";"; ExceptionsType = ExceptionHandling::DwarfCFI; @@ -47,15 +46,14 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { CommentString = "#"; GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; - WeakRefDirective = "\t.weak\t"; - + // Uses '.section' before '.bss' directive UsesELFSectionDirectiveForBSS = true; // Debug Information SupportsDebugInformation = true; - PCSymbol = "."; + DollarIsPC = true; // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 7b4ed9f..1530e77 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -15,6 +15,7 @@ #define PPCTARGETASMINFO_H #include "llvm/MC/MCAsmInfoDarwin.h" +#include "llvm/MC/MCAsmInfoELF.h" namespace llvm { @@ -24,7 +25,7 @@ namespace llvm { explicit PPCMCAsmInfoDarwin(bool is64Bit); }; - class PPCLinuxMCAsmInfo : public MCAsmInfo { + class PPCLinuxMCAsmInfo : public MCAsmInfoELF { virtual void anchor(); public: explicit PPCLinuxMCAsmInfo(bool is64Bit); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 59ba9c4..346a9be 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); @@ -76,11 +77,17 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { + // For fast-isel, a float COPY_TO_REGCLASS can survive this long. + // It's just a nop to keep the register classes happy, so don't + // generate anything. + unsigned Opcode = MI.getOpcode(); + if (Opcode == TargetOpcode::COPY_TO_REGCLASS) + return; + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); // BL8_NOP etc. all have a size of 8 because of the following 'nop'. unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! - unsigned Opcode = MI.getOpcode(); if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP || Opcode == PPC::BL8_NOP_TLS) Size = 8; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 9529267..d7e8402 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -54,7 +54,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const { MCValue Value; - if (!getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) + if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) return false; if (Value.isAbsolute()) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 5f7a39a..f18d095 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -14,13 +14,16 @@ #include "PPCMCTargetDesc.h" #include "InstPrinter/PPCInstPrinter.h" #include "PPCMCAsmInfo.h" +#include "PPCTargetStreamer.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC @@ -34,6 +37,9 @@ using namespace llvm; +// Pin the vtable to this file. +PPCTargetStreamer::~PPCTargetStreamer() {} + static MCInstrInfo *createPPCMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitPPCMCInstrInfo(X); @@ -101,6 +107,29 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } +namespace { +class PPCTargetAsmStreamer : public PPCTargetStreamer { + formatted_raw_ostream &OS; + +public: + PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {} + virtual void emitTCEntry(const MCSymbol &S) { + OS << "\t.tc "; + OS << S.getName(); + OS << "[TC],"; + OS << S.getName(); + OS << '\n'; + } +}; + +class PPCTargetELFStreamer : public PPCTargetStreamer { + virtual void emitTCEntry(const MCSymbol &S) { + // Creates a R_PPC64_TOC relocation + Streamer->EmitSymbolValue(&S, 8); + } +}; +} + // This is duplicated code. Refactor this. static MCStreamer *createMCStreamer(const Target &T, StringRef TT, MCContext &Ctx, MCAsmBackend &MAB, @@ -111,7 +140,20 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, if (Triple(TT).isOSDarwin()) return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); + PPCTargetStreamer *S = new PPCTargetELFStreamer(); + return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + +static MCStreamer * +createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, MCInstPrinter *InstPrint, + MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { + PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS); + + return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI, + useDwarfDirectory, InstPrint, CE, TAB, + ShowInst); } static MCInstPrinter *createPPCMCInstPrinter(const Target &T, @@ -171,6 +213,11 @@ extern "C" void LLVMInitializePowerPCTargetMC() { TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer); TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer); + // Register the asm streamer. + TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer); + TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer); + TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer); + // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 9f29132..0b0ca24 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -40,12 +40,17 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU); +MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); /// createPPCELFObjectWriter - Construct an PPC ELF object writer. MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI); +/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer. +MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); } // End llvm namespace // Generated files will use "namespace PPC". To avoid symbol clash, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp new file mode 100644 index 0000000..bbafe2e --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -0,0 +1,389 @@ +//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" + +using namespace llvm; + +namespace { +class PPCMachObjectWriter : public MCMachObjectTargetWriter { + bool RecordScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + unsigned Log2Size, uint64_t &FixedValue); + + void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue); + +public: + PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, + /*UseAggressiveSymbolFolding=*/Is64Bit) {} + + void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, + const MCAsmLayout &Layout, const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + if (Writer->is64Bit()) { + report_fatal_error("Relocation emission for MachO/PPC64 unimplemented."); + } else + RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + } +}; +} + +/// computes the log2 of the size of the relocation, +/// used for relocation_info::r_length. +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: + report_fatal_error("log2size(FixupKind): Unhandled fixup kind!"); + case FK_PCRel_1: + case FK_Data_1: + return 0; + case FK_PCRel_2: + case FK_Data_2: + return 1; + case FK_PCRel_4: + case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_half16: + case PPC::fixup_ppc_br24: + case FK_Data_4: + return 2; + case FK_PCRel_8: + case FK_Data_8: + return 3; + } + return 0; +} + +/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum. +/// Outline based on PPCELFObjectWriter::getRelocTypeInner(). +static unsigned getRelocType(const MCValue &Target, + const MCFixupKind FixupKind, // from + // Fixup.getKind() + const bool IsPCRel) { + const MCSymbolRefExpr::VariantKind Modifier = + Target.isAbsolute() ? MCSymbolRefExpr::VK_None + : Target.getSymA()->getKind(); + // determine the type of the relocation + unsigned Type = MachO::GENERIC_RELOC_VANILLA; + if (IsPCRel) { // relative to PC + switch ((unsigned)FixupKind) { + default: + report_fatal_error("Unimplemented fixup kind (relative)"); + case PPC::fixup_ppc_br24: + Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24 + break; + case PPC::fixup_ppc_brcond14: + Type = MachO::PPC_RELOC_BR14; + break; + case PPC::fixup_ppc_half16: + switch (Modifier) { + default: + llvm_unreachable("Unsupported modifier for half16 fixup"); + case MCSymbolRefExpr::VK_PPC_HA: + Type = MachO::PPC_RELOC_HA16; + break; + case MCSymbolRefExpr::VK_PPC_LO: + Type = MachO::PPC_RELOC_LO16; + break; + case MCSymbolRefExpr::VK_PPC_HI: + Type = MachO::PPC_RELOC_HI16; + break; + } + break; + } + } else { + switch ((unsigned)FixupKind) { + default: + report_fatal_error("Unimplemented fixup kind (absolute)!"); + case PPC::fixup_ppc_half16: + switch (Modifier) { + default: + llvm_unreachable("Unsupported modifier for half16 fixup"); + case MCSymbolRefExpr::VK_PPC_HA: + Type = MachO::PPC_RELOC_HA16_SECTDIFF; + break; + case MCSymbolRefExpr::VK_PPC_LO: + Type = MachO::PPC_RELOC_LO16_SECTDIFF; + break; + case MCSymbolRefExpr::VK_PPC_HI: + Type = MachO::PPC_RELOC_HI16_SECTDIFF; + break; + } + break; + case FK_Data_4: + break; + case FK_Data_2: + break; + } + } + return Type; +} + +static void makeRelocationInfo(MachO::any_relocation_info &MRE, + const uint32_t FixupOffset, const uint32_t Index, + const unsigned IsPCRel, const unsigned Log2Size, + const unsigned IsExtern, const unsigned Type) { + MRE.r_word0 = FixupOffset; + // The bitfield offsets that work (as determined by trial-and-error) + // are different than what is documented in the mach-o manuals. + // This appears to be an endianness issue; reversing the order of the + // documented bitfields in <llvm/Support/MachO.h> fixes this (but + // breaks x86/ARM assembly). + MRE.r_word1 = ((Index << 8) | // was << 0 + (IsPCRel << 7) | // was << 24 + (Log2Size << 5) | // was << 25 + (IsExtern << 4) | // was << 27 + (Type << 0)); // was << 28 +} + +static void +makeScatteredRelocationInfo(MachO::any_relocation_info &MRE, + const uint32_t Addr, const unsigned Type, + const unsigned Log2Size, const unsigned IsPCRel, + const uint32_t Value2) { + // For notes on bitfield positions and endianness, see: + // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry + MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) | + (IsPCRel << 30) | MachO::R_SCATTERED); + MRE.r_word1 = Value2; +} + +/// Compute fixup offset (address). +static uint32_t getFixupOffset(const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + // On Mach-O, ppc_fixup_half16 relocations must refer to the + // start of the instruction, not the second halfword, as ELF does + if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16) + FixupOffset &= ~uint32_t(3); + return FixupOffset; +} + +/// \return false if falling back to using non-scattered relocation, +/// otherwise true for normal scattered relocation. +/// based on X86MachObjectWriter::RecordScatteredRelocation +/// and ARMMachObjectWriter::RecordScatteredRelocation +bool PPCMachObjectWriter::RecordScatteredRelocation( + MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + unsigned Log2Size, uint64_t &FixedValue) { + // caller already computes these, can we just pass and reuse? + const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup); + const MCFixupKind FK = Fixup.getKind(); + const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK); + const unsigned Type = getRelocType(Target, FK, IsPCRel); + + // Is this a local or SECTDIFF relocation entry? + // SECTDIFF relocation entries have symbol subtractions, + // and require two entries, the first for the add-symbol value, + // the second for the subtract-symbol value. + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint64_t SecAddr = + Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // FIXME: is Type correct? see include/llvm/Support/MachO.h + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + // FIXME: does FixedValue get used?? + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == MachO::PPC_RELOC_SECTDIFF || + Type == MachO::PPC_RELOC_HI16_SECTDIFF || + Type == MachO::PPC_RELOC_LO16_SECTDIFF || + Type == MachO::PPC_RELOC_HA16_SECTDIFF || + Type == MachO::PPC_RELOC_LO14_SECTDIFF || + Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) { + // X86 had this piece, but ARM does not + // If the offset is too large to fit in a scattered relocation, + // we're hosed. It's an unfortunate limitation of the MachO format. + if (FixupOffset > 0xffffff) { + char Buffer[32]; + format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer)); + Asm.getContext().FatalError(Fixup.getLoc(), + Twine("Section too large, can't encode " + "r_address (") + + Buffer + ") into 24 bits of scattered " + "relocation entry."); + llvm_unreachable("fatal error returned?!"); + } + + // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()? + // see PPCMCExpr::EvaluateAsRelocatableImpl() + uint32_t other_half = 0; + switch (Type) { + case MachO::PPC_RELOC_LO16_SECTDIFF: + other_half = (FixedValue >> 16) & 0xffff; + // applyFixupOffset longer extracts the high part because it now assumes + // this was already done. + // It looks like this is not true for the FixedValue needed with Mach-O + // relocs. + // So we need to adjust FixedValue again here. + FixedValue &= 0xffff; + break; + case MachO::PPC_RELOC_HA16_SECTDIFF: + other_half = FixedValue & 0xffff; + FixedValue = + ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff; + break; + case MachO::PPC_RELOC_HI16_SECTDIFF: + other_half = FixedValue & 0xffff; + FixedValue = (FixedValue >> 16) & 0xffff; + break; + default: + llvm_unreachable("Invalid PPC scattered relocation type."); + break; + } + + MachO::any_relocation_info MRE; + makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR, + Log2Size, IsPCRel, Value2); + Writer->addRelocation(Fragment->getParent(), MRE); + } else { + // If the offset is more than 24-bits, it won't fit in a scattered + // relocation offset field, so we fall back to using a non-scattered + // relocation. This is a bit risky, as if the offset reaches out of + // the block and the linker is doing scattered loading on this + // symbol, things can go badly. + // + // Required for 'as' compatibility. + if (FixupOffset > 0xffffff) + return false; + } + MachO::any_relocation_info MRE; + makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value); + Writer->addRelocation(Fragment->getParent(), MRE); + return true; +} + +// see PPCELFObjectWriter for a general outline of cases +void PPCMachObjectWriter::RecordPPCRelocation( + MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + const MCFixupKind FK = Fixup.getKind(); // unsigned + const unsigned Log2Size = getFixupKindLog2Size(FK); + const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK); + const unsigned RelocType = getRelocType(Target, FK, IsPCRel); + + // If this is a difference or a defined symbol plus an offset, then we need a + // scattered relocation entry. Differences always require scattered + // relocations. + if (Target.getSymB() && + // Q: are branch targets ever scattered? + RelocType != MachO::PPC_RELOC_BR24 && + RelocType != MachO::PPC_RELOC_BR14) { + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + Log2Size, FixedValue); + return; + } + + // this doesn't seem right for RIT_PPC_BR24 + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // See <reloc.h>. + const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = RelocType; + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + // + // FIXME: Currently, these are never generated (see code below). I cannot + // find a case where they are actually emitted. + report_fatal_error("FIXME: relocations to absolute targets " + "not yet implemented"); + // the above line stolen from ARM, not sure + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + } + + // Check whether we need an external or internal relocation. + if (Writer->doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolOffset(SD); + } else { + // The index is the section ordinal (1-based). + const MCSectionData &SymSD = + Asm.getSectionData(SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += Writer->getSectionAddress(&SymSD); + } + if (IsPCRel) + FixedValue -= Writer->getSectionAddress(Fragment->getParent()); + } + + // struct relocation_info (8 bytes) + MachO::any_relocation_info MRE; + makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern, + Type); + Writer->addRelocation(Fragment->getParent(), MRE); +} + +MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype) { + return createMachObjectWriter( + new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS, + /*IsLittleEndian=*/false); +} diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 806822c..54e3d40 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -57,6 +57,8 @@ def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; +def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true", + "Enable the fcpsgn instruction">; def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true", "Enable the fre instruction">; def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true", @@ -85,6 +87,13 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", "Enable QPX instructions">; +def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", + "Enable VSX instructions">; + +def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", + "Treat mftb as deprecated">; +def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", + "Treat vector data stream cache control instructions as deprecated">; // Note: Future features to add when support is extended to more // recent ISA levels: @@ -146,10 +155,10 @@ include "PPCInstrInfo.td" def : Processor<"generic", G3Itineraries, [Directive32]>; def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, - FeatureBookE]>; + FeatureBookE, DeprecatedMFTB]>; def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, - FeatureBookE]>; + FeatureBookE, DeprecatedMFTB]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603, @@ -185,29 +194,32 @@ def : ProcessorModel<"g5", G5Model, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureFRES, FeatureFRSQRTE, - Feature64Bit /*, Feature64BitRegs */]>; + Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"e500mc", PPCE500mcModel, [DirectiveE500mc, FeatureMFOCRF, - FeatureSTFIWX, FeatureBookE, FeatureISEL]>; + FeatureSTFIWX, FeatureBookE, FeatureISEL, + DeprecatedMFTB]>; def : ProcessorModel<"e5500", PPCE5500Model, [DirectiveE5500, FeatureMFOCRF, Feature64Bit, - FeatureSTFIWX, FeatureBookE, FeatureISEL]>; + FeatureSTFIWX, FeatureBookE, FeatureISEL, + DeprecatedMFTB]>; def : ProcessorModel<"a2", PPCA2Model, [DirectiveA2, FeatureBookE, FeatureMFOCRF, - FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */]>; + /*, Feature64BitRegs */, DeprecatedMFTB]>; def : ProcessorModel<"a2q", PPCA2Model, [DirectiveA2, FeatureBookE, FeatureMFOCRF, - FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */, FeatureQPX]>; + /*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>; def : ProcessorModel<"pwr3", G5Model, [DirectivePwr3, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF, @@ -220,32 +232,37 @@ def : ProcessorModel<"pwr5", G5Model, [DirectivePwr5, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureSTFIWX, Feature64Bit]>; + FeatureSTFIWX, Feature64Bit, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr5x", G5Model, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureSTFIWX, FeatureFPRND, Feature64Bit]>; + FeatureSTFIWX, FeatureFPRND, Feature64Bit, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr6", G5Model, [DirectivePwr6, FeatureAltivec, - FeatureMFOCRF, FeatureFSqrt, FeatureFRE, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>; + FeatureFPRND, Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr6x", G5Model, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, Feature64Bit]>; + FeatureFPRND, Feature64Bit, + DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", G5Model, [DirectivePwr7, FeatureAltivec, - FeatureMFOCRF, FeatureFSqrt, FeatureFRE, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */]>; + Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index bbfad87..ada34ed 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -23,6 +23,7 @@ #include "MCTargetDesc/PPCMCExpr.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "PPCTargetStreamer.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -202,7 +203,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, .getGVStubEntry(SymToPrint); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: - StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else if (GV->isDeclaration() || GV->hasCommonLinkage() || GV->hasAvailableExternallyLinkage()) { SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); @@ -212,12 +213,12 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, getHiddenGVStubEntry(SymToPrint); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: - StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else { - SymToPrint = Mang->getSymbol(GV); + SymToPrint = getSymbol(GV); } } else { - SymToPrint = Mang->getSymbol(GV); + SymToPrint = getSymbol(GV); } O << *SymToPrint; @@ -363,7 +364,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); MCSymbol *MOSymbol = 0; if (MO.isGlobal()) - MOSymbol = Mang->getSymbol(MO.getGlobal()); + MOSymbol = getSymbol(MO.getGlobal()); else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) @@ -402,7 +403,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); const GlobalValue *RealGValue = GAlias ? GAlias->resolveAliasedGlobal(false) : GValue; - MOSymbol = Mang->getSymbol(RealGValue); + MOSymbol = getSymbol(RealGValue); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); IsExternal = GVar && !GVar->hasInitializer(); IsCommon = GVar && RealGValue->hasCommonLinkage(); @@ -413,7 +414,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); - if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI()) + if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() || + TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = @@ -438,18 +440,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else if (MO.isCPI()) + else if (MO.isCPI()) { MOSymbol = GetCPISymbol(MO.getIndex()); + if (TM.getCodeModel() == CodeModel::Large) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + } else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); const GlobalValue *RealGValue = GAlias ? GAlias->resolveAliasedGlobal(false) : GValue; - MOSymbol = Mang->getSymbol(RealGValue); + MOSymbol = getSymbol(RealGValue); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage()) + RealGValue->hasAvailableExternallyLinkage() || + TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -479,14 +485,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); const GlobalValue *RealGValue = GAlias ? GAlias->resolveAliasedGlobal(false) : GValue; - MOSymbol = Mang->getSymbol(RealGValue); + MOSymbol = getSymbol(RealGValue); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); IsExternal = GVar && !GVar->hasInitializer(); IsFunction = !GVar; } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); - if (IsFunction || IsExternal) + if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = @@ -502,7 +508,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, OutContext); @@ -520,7 +526,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *Exp = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, OutContext); @@ -534,7 +540,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA, OutContext); @@ -550,7 +556,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO, OutContext); @@ -571,7 +577,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, OutContext); @@ -586,7 +592,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA, OutContext); @@ -602,7 +608,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO, OutContext); @@ -623,7 +629,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, OutContext); @@ -638,7 +644,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, OutContext); @@ -654,7 +660,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = Mang->getSymbol(GValue); + MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); @@ -704,6 +710,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { break; case PPC::LD: case PPC::STD: + case PPC::LWA_32: case PPC::LWA: { // Verify alignment is legal, so we don't create relocations // that can't be supported. @@ -765,6 +772,9 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { bool isPPC64 = TD->getPointerSizeInBits() == 64; + PPCTargetStreamer &TS = + static_cast<PPCTargetStreamer &>(OutStreamer.getTargetStreamer()); + if (isPPC64 && !TOC.empty()) { const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, @@ -775,7 +785,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { E = TOC.end(); I != E; ++I) { OutStreamer.EmitLabel(I->second); MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName()); - OutStreamer.EmitTCEntry(*S); + TS.emitTCEntry(*S); } } @@ -1051,7 +1061,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMIMacho.getGVStubEntry(NLPSym); - StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true); + StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true); } } } diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 4e30c537..4224ae2 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -253,12 +253,19 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case Intrinsic::sin: case Intrinsic::cos: return true; + case Intrinsic::copysign: + if (CI->getArgOperand(0)->getType()->getScalarType()-> + isPPC_FP128Ty()) + return true; + else + continue; // ISD::FCOPYSIGN is never a library call. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } } @@ -283,8 +290,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { default: return true; case LibFunc::copysign: case LibFunc::copysignf: - case LibFunc::copysignl: continue; // ISD::FCOPYSIGN is never a library call. + case LibFunc::copysignl: + return true; case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl: @@ -309,6 +317,10 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case LibFunc::rintf: case LibFunc::rintl: Opcode = ISD::FRINT; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + Opcode = ISD::FROUND; break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index a584188..e8e7f4c 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -37,6 +37,37 @@ def RetCC_PPC : CallingConv<[ ]>; +// Note that we don't currently have calling conventions for 64-bit +// PowerPC, but handle all the complexities of the ABI in the lowering +// logic. FIXME: See if the logic can be simplified with use of CCs. +// This may require some extensions to current table generation. + +// Simple calling convention for 64-bit ELF PowerPC fast isel. +// Only handle ints and floats. All ints are promoted to i64. +// Vector types and quadword ints are not handled. +def CC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i8], CCPromoteToType<i64>>, + CCIfType<[i16], CCPromoteToType<i64>>, + CCIfType<[i32], CCPromoteToType<i64>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, + CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>> +]>; + +// Simple return-value convention for 64-bit ELF PowerPC fast isel. +// All small ints are promoted to i64. Vector types, quadword ints, +// and multiple register returns are "supported" to avoid compile +// errors, but none are handled by the fast selector. +def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i8], CCPromoteToType<i64>>, + CCIfType<[i16], CCPromoteToType<i64>>, + CCIfType<[i32], CCPromoteToType<i64>>, + CCIfType<[i64], CCAssignToReg<[X3, X4]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[f32], CCAssignToReg<[F1, F2]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> +]>; + //===----------------------------------------------------------------------===// // PowerPC System V Release 4 32-bit ABI //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 8cbf1fb..09117e7 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -37,6 +37,25 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +//===----------------------------------------------------------------------===// +// +// TBD: +// FastLowerArguments: Handle simple cases. +// PPCMaterializeGV: Handle TLS. +// SelectCall: Handle function pointers. +// SelectCall: Handle multi-register return values. +// SelectCall: Optimize away nops for local calls. +// processCallArgs: Handle bit-converted arguments. +// finishCall: Handle multi-register return values. +// PPCComputeAddress: Handle parameter references as FrameIndex's. +// PPCEmitCmp: Handle immediate as operand 1. +// SelectCall: Handle small byval arguments. +// SelectIntrinsicCall: Implement. +// SelectSelect: Implement. +// Consider factoring isTypeLegal into the base class. +// Implement switches and jump tables. +// +//===----------------------------------------------------------------------===// using namespace llvm; namespace { @@ -52,7 +71,7 @@ typedef struct Address { int FI; } Base; - int Offset; + long Offset; // Innocuous defaults for our address. Address() @@ -89,15 +108,76 @@ class PPCFastISel : public FastISel { virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); virtual bool FastLowerArguments(); + virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm); + virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + + // Instruction selection routines. + private: + bool SelectLoad(const Instruction *I); + bool SelectStore(const Instruction *I); + bool SelectBranch(const Instruction *I); + bool SelectIndirectBr(const Instruction *I); + bool SelectCmp(const Instruction *I); + bool SelectFPExt(const Instruction *I); + bool SelectFPTrunc(const Instruction *I); + bool SelectIToFP(const Instruction *I, bool IsSigned); + bool SelectFPToI(const Instruction *I, bool IsSigned); + bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); + bool SelectCall(const Instruction *I); + bool SelectRet(const Instruction *I); + bool SelectTrunc(const Instruction *I); + bool SelectIntExt(const Instruction *I); // Utility routines. private: + bool isTypeLegal(Type *Ty, MVT &VT); + bool isLoadTypeLegal(Type *Ty, MVT &VT); + bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, + bool isZExt, unsigned DestReg); + bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + const TargetRegisterClass *RC, bool IsZExt = true, + unsigned FP64LoadOpc = PPC::LFD); + bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); + bool PPCComputeAddress(const Value *Obj, Address &Addr); + void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, + unsigned &IndexReg); + bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + unsigned DestReg, bool IsZExt); unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT); unsigned PPCMaterializeInt(const Constant *C, MVT VT); unsigned PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); unsigned PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC); + unsigned PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned); + unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned); + + // Call handling routines. + private: + bool processCallArgs(SmallVectorImpl<Value*> &Args, + SmallVectorImpl<unsigned> &ArgRegs, + SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, + SmallVectorImpl<unsigned> &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes, + bool IsVarArg); + void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes, bool IsVarArg); + CCAssignFn *usePPC32CCs(unsigned Flag); private: #include "PPCGenFastISel.inc" @@ -106,10 +186,1601 @@ class PPCFastISel : public FastISel { } // end anonymous namespace +#include "PPCGenCallingConv.inc" + +// Function whose sole purpose is to kill compiler warnings +// stemming from unused functions included from PPCGenCallingConv.inc. +CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) { + if (Flag == 1) + return CC_PPC32_SVR4; + else if (Flag == 2) + return CC_PPC32_SVR4_ByVal; + else if (Flag == 3) + return CC_PPC32_SVR4_VarArg; + else + return RetCC_PPC; +} + +static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { + switch (Pred) { + // These are not representable with any single compare. + case CmpInst::FCMP_FALSE: + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_UGE: + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_ULE: + case CmpInst::FCMP_UNE: + case CmpInst::FCMP_TRUE: + default: + return Optional<PPC::Predicate>(); + + case CmpInst::FCMP_OEQ: + case CmpInst::ICMP_EQ: + return PPC::PRED_EQ; + + case CmpInst::FCMP_OGT: + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SGT: + return PPC::PRED_GT; + + case CmpInst::FCMP_OGE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + return PPC::PRED_GE; + + case CmpInst::FCMP_OLT: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_SLT: + return PPC::PRED_LT; + + case CmpInst::FCMP_OLE: + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + return PPC::PRED_LE; + + case CmpInst::FCMP_ONE: + case CmpInst::ICMP_NE: + return PPC::PRED_NE; + + case CmpInst::FCMP_ORD: + return PPC::PRED_NU; + + case CmpInst::FCMP_UNO: + return PPC::PRED_UN; + } +} + +// Determine whether the type Ty is simple enough to be handled by +// fast-isel, and return its equivalent machine type in VT. +// FIXME: Copied directly from ARM -- factor into base class? +bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { + EVT Evt = TLI.getValueType(Ty, true); + + // Only handle simple types. + if (Evt == MVT::Other || !Evt.isSimple()) return false; + VT = Evt.getSimpleVT(); + + // Handle all legal types, i.e. a register that will directly hold this + // value. + return TLI.isTypeLegal(VT); +} + +// Determine whether the type Ty is simple enough to be handled by +// fast-isel as a load target, and return its equivalent machine type in VT. +bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { + if (isTypeLegal(Ty, VT)) return true; + + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. + if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) { + return true; + } + + return false; +} + +// Given a value Obj, create an Address object Addr that represents its +// address. Return false if we can't handle it. +bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { + const User *U = NULL; + unsigned Opcode = Instruction::UserOp1; + if (const Instruction *I = dyn_cast<Instruction>(Obj)) { + // Don't walk into other basic blocks unless the object is an alloca from + // another block, otherwise it may not have a virtual register assigned. + if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: + break; + case Instruction::BitCast: + // Look through bitcasts. + return PPCComputeAddress(U->getOperand(0), Addr); + case Instruction::IntToPtr: + // Look past no-op inttoptrs. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return PPCComputeAddress(U->getOperand(0), Addr); + break; + case Instruction::PtrToInt: + // Look past no-op ptrtoints. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return PPCComputeAddress(U->getOperand(0), Addr); + break; + case Instruction::GetElementPtr: { + Address SavedAddr = Addr; + long TmpOffset = Addr.Offset; + + // Iterate through the GEP folding the constants into offsets where + // we can. + gep_type_iterator GTI = gep_type_begin(U); + for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end(); + II != IE; ++II, ++GTI) { + const Value *Op = *II; + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + const StructLayout *SL = TD.getStructLayout(STy); + unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); + TmpOffset += SL->getElementOffset(Idx); + } else { + uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + for (;;) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // Constant-offset addressing. + TmpOffset += CI->getSExtValue() * S; + break; + } + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. + ConstantInt *CI = + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + TmpOffset += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast<AddOperator>(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } + } + } + + // Try to grab the base operand now. + Addr.Offset = TmpOffset; + if (PPCComputeAddress(U->getOperand(0), Addr)) return true; + + // We failed, restore everything and try the other options. + Addr = SavedAddr; + + unsupported_gep: + break; + } + case Instruction::Alloca: { + const AllocaInst *AI = cast<AllocaInst>(Obj); + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = SI->second; + return true; + } + break; + } + } + + // FIXME: References to parameters fall through to the behavior + // below. They should be able to reference a frame index since + // they are stored to the stack, so we can get "ld rx, offset(r1)" + // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will + // just contain the parameter. Try to handle this with a FI. + + // Try to get this in a register if nothing else has worked. + if (Addr.Base.Reg == 0) + Addr.Base.Reg = getRegForValue(Obj); + + // Prevent assignment of base register to X0, which is inappropriate + // for loads and stores alike. + if (Addr.Base.Reg != 0) + MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass); + + return Addr.Base.Reg != 0; +} + +// Fix up some addresses that can't be used directly. For example, if +// an offset won't fit in an instruction field, we may need to move it +// into an index register. +void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, + unsigned &IndexReg) { + + // Check whether the offset fits in the instruction field. + if (!isInt<16>(Addr.Offset)) + UseOffset = false; + + // If this is a stack pointer and the offset needs to be simplified then + // put the alloca address into a register, set the base type back to + // register and continue. This should almost never happen. + if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) { + unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + ResultReg).addFrameIndex(Addr.Base.FI).addImm(0); + Addr.Base.Reg = ResultReg; + Addr.BaseType = Address::RegBase; + } + + if (!UseOffset) { + IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context) + : Type::getInt64Ty(*Context)); + const ConstantInt *Offset = + ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset)); + IndexReg = PPCMaterializeInt(Offset, MVT::i64); + assert(IndexReg && "Unexpected error in PPCMaterializeInt!"); + } +} + +// Emit a load instruction if possible, returning true if we succeeded, +// otherwise false. See commentary below for how the register class of +// the load is determined. +bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + const TargetRegisterClass *RC, + bool IsZExt, unsigned FP64LoadOpc) { + unsigned Opc; + bool UseOffset = true; + + // If ResultReg is given, it determines the register class of the load. + // Otherwise, RC is the register class to use. If the result of the + // load isn't anticipated in this block, both may be zero, in which + // case we must make a conservative guess. In particular, don't assign + // R0 or X0 to the result register, as the result may be used in a load, + // store, add-immediate, or isel that won't permit this. (Though + // perhaps the spill and reload of live-exit values would handle this?) + const TargetRegisterClass *UseRC = + (ResultReg ? MRI.getRegClass(ResultReg) : + (RC ? RC : + (VT == MVT::f64 ? &PPC::F8RCRegClass : + (VT == MVT::f32 ? &PPC::F4RCRegClass : + (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : + &PPC::GPRC_and_GPRC_NOR0RegClass))))); + + bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass); + + switch (VT.SimpleTy) { + default: // e.g., vector types not handled + return false; + case MVT::i8: + Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8; + break; + case MVT::i16: + Opc = (IsZExt ? + (Is32BitInt ? PPC::LHZ : PPC::LHZ8) : + (Is32BitInt ? PPC::LHA : PPC::LHA8)); + break; + case MVT::i32: + Opc = (IsZExt ? + (Is32BitInt ? PPC::LWZ : PPC::LWZ8) : + (Is32BitInt ? PPC::LWA_32 : PPC::LWA)); + if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0)) + UseOffset = false; + break; + case MVT::i64: + Opc = PPC::LD; + assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) && + "64-bit load with 32-bit target??"); + UseOffset = ((Addr.Offset & 3) == 0); + break; + case MVT::f32: + Opc = PPC::LFS; + break; + case MVT::f64: + Opc = FP64LoadOpc; + break; + } + + // If necessary, materialize the offset into a register and use + // the indexed form. Also handle stack pointers with special needs. + unsigned IndexReg = 0; + PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + if (ResultReg == 0) + ResultReg = createResultReg(UseRC); + + // Note: If we still have a frame index here, we know the offset is + // in range, as otherwise PPCSimplifyAddress would have converted it + // into a RegBase. + if (Addr.BaseType == Address::FrameIndexBase) { + + MachineMemOperand *MMO = + FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), + MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), + MFI.getObjectAlignment(Addr.Base.FI)); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); + + // Base reg with offset in range. + } else if (UseOffset) { + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addImm(Addr.Offset).addReg(Addr.Base.Reg); + + // Indexed form. + } else { + // Get the RR opcode corresponding to the RI one. FIXME: It would be + // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it + // is hard to get at. + switch (Opc) { + default: llvm_unreachable("Unexpected opcode!"); + case PPC::LBZ: Opc = PPC::LBZX; break; + case PPC::LBZ8: Opc = PPC::LBZX8; break; + case PPC::LHZ: Opc = PPC::LHZX; break; + case PPC::LHZ8: Opc = PPC::LHZX8; break; + case PPC::LHA: Opc = PPC::LHAX; break; + case PPC::LHA8: Opc = PPC::LHAX8; break; + case PPC::LWZ: Opc = PPC::LWZX; break; + case PPC::LWZ8: Opc = PPC::LWZX8; break; + case PPC::LWA: Opc = PPC::LWAX; break; + case PPC::LWA_32: Opc = PPC::LWAX_32; break; + case PPC::LD: Opc = PPC::LDX; break; + case PPC::LFS: Opc = PPC::LFSX; break; + case PPC::LFD: Opc = PPC::LFDX; break; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(Addr.Base.Reg).addReg(IndexReg); + } + + return true; +} + +// Attempt to fast-select a load instruction. +bool PPCFastISel::SelectLoad(const Instruction *I) { + // FIXME: No atomic loads are supported. + if (cast<LoadInst>(I)->isAtomic()) + return false; + + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(I->getType(), VT)) + return false; + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(I->getOperand(0), Addr)) + return false; + + // Look at the currently assigned register for this instruction + // to determine the required register class. This is necessary + // to constrain RA from using R0/X0 when this is not legal. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + + unsigned ResultReg = 0; + if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) + return false; + UpdateValueMap(I, ResultReg); + return true; +} + +// Emit a store instruction to store SrcReg at Addr. +bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { + assert(SrcReg && "Nothing to store!"); + unsigned Opc; + bool UseOffset = true; + + const TargetRegisterClass *RC = MRI.getRegClass(SrcReg); + bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass); + + switch (VT.SimpleTy) { + default: // e.g., vector types not handled + return false; + case MVT::i8: + Opc = Is32BitInt ? PPC::STB : PPC::STB8; + break; + case MVT::i16: + Opc = Is32BitInt ? PPC::STH : PPC::STH8; + break; + case MVT::i32: + assert(Is32BitInt && "Not GPRC for i32??"); + Opc = PPC::STW; + break; + case MVT::i64: + Opc = PPC::STD; + UseOffset = ((Addr.Offset & 3) == 0); + break; + case MVT::f32: + Opc = PPC::STFS; + break; + case MVT::f64: + Opc = PPC::STFD; + break; + } + + // If necessary, materialize the offset into a register and use + // the indexed form. Also handle stack pointers with special needs. + unsigned IndexReg = 0; + PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + + // Note: If we still have a frame index here, we know the offset is + // in range, as otherwise PPCSimplifyAddress would have converted it + // into a RegBase. + if (Addr.BaseType == Address::FrameIndexBase) { + MachineMemOperand *MMO = + FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), + MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), + MFI.getObjectAlignment(Addr.Base.FI)); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg) + .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); + + // Base reg with offset in range. + } else if (UseOffset) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); + + // Indexed form. + else { + // Get the RR opcode corresponding to the RI one. FIXME: It would be + // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it + // is hard to get at. + switch (Opc) { + default: llvm_unreachable("Unexpected opcode!"); + case PPC::STB: Opc = PPC::STBX; break; + case PPC::STH : Opc = PPC::STHX; break; + case PPC::STW : Opc = PPC::STWX; break; + case PPC::STB8: Opc = PPC::STBX8; break; + case PPC::STH8: Opc = PPC::STHX8; break; + case PPC::STW8: Opc = PPC::STWX8; break; + case PPC::STD: Opc = PPC::STDX; break; + case PPC::STFS: Opc = PPC::STFSX; break; + case PPC::STFD: Opc = PPC::STFDX; break; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); + } + + return true; +} + +// Attempt to fast-select a store instruction. +bool PPCFastISel::SelectStore(const Instruction *I) { + Value *Op0 = I->getOperand(0); + unsigned SrcReg = 0; + + // FIXME: No atomics loads are supported. + if (cast<StoreInst>(I)->isAtomic()) + return false; + + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(Op0->getType(), VT)) + return false; + + // Get the value to be stored into a register. + SrcReg = getRegForValue(Op0); + if (SrcReg == 0) + return false; + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(I->getOperand(1), Addr)) + return false; + + if (!PPCEmitStore(VT, SrcReg, Addr)) + return false; + + return true; +} + +// Attempt to fast-select a branch instruction. +bool PPCFastISel::SelectBranch(const Instruction *I) { + const BranchInst *BI = cast<BranchInst>(I); + MachineBasicBlock *BrBB = FuncInfo.MBB; + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + // For now, just try the simplest case where it's fed by a compare. + if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { + Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate()); + if (!OptPPCPred) + return false; + + PPC::Predicate PPCPred = OptPPCPred.getValue(); + + // Take advantage of fall-through opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + PPCPred = PPC::InvertPredicate(PPCPred); + } + + unsigned CondReg = createResultReg(&PPC::CRRCRegClass); + + if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CondReg)) + return false; + + BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC)) + .addImm(PPCPred).addReg(CondReg).addMBB(TBB); + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; + + } else if (const ConstantInt *CI = + dyn_cast<ConstantInt>(BI->getCondition())) { + uint64_t Imm = CI->getZExtValue(); + MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; + FastEmitBranch(Target, DL); + return true; + } + + // FIXME: ARM looks for a case where the block containing the compare + // has been split from the block containing the branch. If this happens, + // there is a vreg available containing the result of the compare. I'm + // not sure we can do much, as we've lost the predicate information with + // the compare instruction -- we have a 4-bit CR but don't know which bit + // to test here. + return false; +} + +// Attempt to emit a compare of the two source values. Signed and unsigned +// comparisons are supported. Return false if we can't handle it. +bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, + bool IsZExt, unsigned DestReg) { + Type *Ty = SrcValue1->getType(); + EVT SrcEVT = TLI.getValueType(Ty, true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); + + // See if operand 2 is an immediate encodeable in the compare. + // FIXME: Operands are not in canonical order at -O0, so an immediate + // operand in position 1 is a lost opportunity for now. We are + // similar to ARM in this regard. + long Imm = 0; + bool UseImm = false; + + // Only 16-bit integer constants can be represented in compares for + // PowerPC. Others will be materialized into a register. + if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) { + if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || + SrcVT == MVT::i8 || SrcVT == MVT::i1) { + const APInt &CIVal = ConstInt->getValue(); + Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue(); + if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm))) + UseImm = true; + } + } + + unsigned CmpOpc; + bool NeedsExt = false; + switch (SrcVT.SimpleTy) { + default: return false; + case MVT::f32: + CmpOpc = PPC::FCMPUS; + break; + case MVT::f64: + CmpOpc = PPC::FCMPUD; + break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + NeedsExt = true; + // Intentional fall-through. + case MVT::i32: + if (!UseImm) + CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW; + else + CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI; + break; + case MVT::i64: + if (!UseImm) + CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD; + else + CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI; + break; + } + + unsigned SrcReg1 = getRegForValue(SrcValue1); + if (SrcReg1 == 0) + return false; + + unsigned SrcReg2 = 0; + if (!UseImm) { + SrcReg2 = getRegForValue(SrcValue2); + if (SrcReg2 == 0) + return false; + } + + if (NeedsExt) { + unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt)) + return false; + SrcReg1 = ExtReg; + + if (!UseImm) { + unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt)) + return false; + SrcReg2 = ExtReg; + } + } + + if (!UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + .addReg(SrcReg1).addReg(SrcReg2); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + .addReg(SrcReg1).addImm(Imm); + + return true; +} + +// Attempt to fast-select a floating-point extend instruction. +bool PPCFastISel::SelectFPExt(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::f32 || DestVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // No code is generated for a FP extend. + UpdateValueMap(I, SrcReg); + return true; +} + +// Attempt to fast-select a floating-point truncate instruction. +bool PPCFastISel::SelectFPTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::f64 || DestVT != MVT::f32) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // Round the result to single precision. + unsigned DestReg = createResultReg(&PPC::F4RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg) + .addReg(SrcReg); + + UpdateValueMap(I, DestReg); + return true; +} + +// Move an i32 or i64 value in a GPR to an f64 value in an FPR. +// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte +// stack slot and 4-byte store/load sequence. Or just sext the 4-byte +// case to 8 bytes which produces tighter code but wastes stack space. +unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, + bool IsSigned) { + + // If necessary, extend 32-bit int to 64-bit. + if (SrcVT == MVT::i32) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return 0; + SrcReg = TmpReg; + } + + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the GPR. + if (!PPCEmitStore(MVT::i64, SrcReg, Addr)) + return 0; + + // Load the integer value into an FPR. The kind of load used depends + // on a number of conditions. + unsigned LoadOpc = PPC::LFD; + + if (SrcVT == MVT::i32) { + Addr.Offset = 4; + if (!IsSigned) + LoadOpc = PPC::LFIWZX; + else if (PPCSubTarget.hasLFIWAX()) + LoadOpc = PPC::LFIWAX; + } + + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned ResultReg = 0; + if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select an integer-to-floating-point conversion. +bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { + MVT DstVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::f32 && DstVT != MVT::f64) + return false; + + Value *Src = I->getOperand(0); + EVT SrcEVT = TLI.getValueType(Src->getType(), true); + if (!SrcEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && + SrcVT != MVT::i32 && SrcVT != MVT::i64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // We can only lower an unsigned convert if we have the newer + // floating-point conversion operations. + if (!IsSigned && !PPCSubTarget.hasFPCVT()) + return false; + + // FIXME: For now we require the newer floating-point conversion operations + // (which are present only on P7 and A2 server models) when converting + // to single-precision float. Otherwise we have to generate a lot of + // fiddly code to avoid double rounding. If necessary, the fiddly code + // can be found in PPCTargetLowering::LowerINT_TO_FP(). + if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT()) + return false; + + // Extend the input if necessary. + if (SrcVT == MVT::i8 || SrcVT == MVT::i16) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return false; + SrcVT = MVT::i64; + SrcReg = TmpReg; + } + + // Move the integer value to an FPR. + unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned); + if (FPReg == 0) + return false; + + // Determine the opcode for the conversion. + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned DestReg = createResultReg(RC); + unsigned Opc; + + if (DstVT == MVT::f32) + Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS; + else + Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(FPReg); + + UpdateValueMap(I, DestReg); + return true; +} + +// Move the floating-point value in SrcReg into an integer destination +// register, and return the register (or zero if we can't handle it). +// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned) { + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + // Note that if have STFIWX available, we could use a 4-byte stack + // slot for i32, but this being fast-isel we'll just go with the + // easiest code gen possible. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the FPR. + if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) + return 0; + + // Reload it into a GPR. If we want an i32, modify the address + // to have a 4-byte offset so we load from the right place. + if (VT == MVT::i32) + Addr.Offset = 4; + + // Look at the currently assigned register for this instruction + // to determine the required register class. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + + unsigned ResultReg = 0; + if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select a floating-point-to-integer conversion. +bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { + MVT DstVT, SrcVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::i32 && DstVT != MVT::i64) + return false; + + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + if (!isTypeLegal(SrcTy, SrcVT)) + return false; + + if (SrcVT != MVT::f32 && SrcVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // Convert f32 to f64 if necessary. This is just a meaningless copy + // to get the register class right. COPY_TO_REGCLASS is needed since + // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream. + const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); + if (InRC == &PPC::F4RCRegClass) { + unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg) + .addReg(SrcReg).addImm(PPC::F8RCRegClassID); + SrcReg = TmpReg; + } + + // Determine the opcode for the conversion, which takes place + // entirely within FPRs. + unsigned DestReg = createResultReg(&PPC::F8RCRegClass); + unsigned Opc; + + if (DstVT == MVT::i32) + if (IsSigned) + Opc = PPC::FCTIWZ; + else + Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + else + Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Now move the integer value from a float register to an integer register. + unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); + if (IntReg == 0) + return false; + + UpdateValueMap(I, IntReg); + return true; +} + +// Attempt to fast-select a binary integer operation that isn't already +// handled automatically. +bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { + EVT DestVT = TLI.getValueType(I->getType(), true); + + // We can get here in the case when we have a binary operation on a non-legal + // type and the target independent selector doesn't know how to handle it. + if (DestVT != MVT::i16 && DestVT != MVT::i8) + return false; + + // Look at the currently assigned register for this instruction + // to determine the required register class. If there is no register, + // make a conservative choice (don't assign R0). + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + (AssignedReg ? MRI.getRegClass(AssignedReg) : + &PPC::GPRC_and_GPRC_NOR0RegClass); + bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); + + unsigned Opc; + switch (ISDOpcode) { + default: return false; + case ISD::ADD: + Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8; + break; + case ISD::OR: + Opc = IsGPRC ? PPC::OR : PPC::OR8; + break; + case ISD::SUB: + Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8; + break; + } + + unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass); + unsigned SrcReg1 = getRegForValue(I->getOperand(0)); + if (SrcReg1 == 0) return false; + + // Handle case of small immediate operand. + if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) { + const APInt &CIVal = ConstInt->getValue(); + int Imm = (int)CIVal.getSExtValue(); + bool UseImm = true; + if (isInt<16>(Imm)) { + switch (Opc) { + default: + llvm_unreachable("Missing case!"); + case PPC::ADD4: + Opc = PPC::ADDI; + MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); + break; + case PPC::ADD8: + Opc = PPC::ADDI8; + MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); + break; + case PPC::OR: + Opc = PPC::ORI; + break; + case PPC::OR8: + Opc = PPC::ORI8; + break; + case PPC::SUBF: + if (Imm == -32768) + UseImm = false; + else { + Opc = PPC::ADDI; + MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); + Imm = -Imm; + } + break; + case PPC::SUBF8: + if (Imm == -32768) + UseImm = false; + else { + Opc = PPC::ADDI8; + MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); + Imm = -Imm; + } + break; + } + + if (UseImm) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(SrcReg1).addImm(Imm); + UpdateValueMap(I, ResultReg); + return true; + } + } + } + + // Reg-reg case. + unsigned SrcReg2 = getRegForValue(I->getOperand(1)); + if (SrcReg2 == 0) return false; + + // Reverse operands for subtract-from. + if (ISDOpcode == ISD::SUB) + std::swap(SrcReg1, SrcReg2); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(SrcReg1).addReg(SrcReg2); + UpdateValueMap(I, ResultReg); + return true; +} + +// Handle arguments to a call that we're attempting to fast-select. +// Return false if the arguments are too complex for us at the moment. +bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, + SmallVectorImpl<unsigned> &ArgRegs, + SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, + SmallVectorImpl<unsigned> &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes, + bool IsVarArg) { + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); + + // Bail out if we can't handle any of the arguments. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Skip vector arguments for now, as well as long double and + // uint128_t, and anything that isn't passed in a register. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || + !VA.isRegLoc() || VA.needsCustom()) + return false; + + // Skip bit-converted arguments for now. + if (VA.getLocInfo() == CCValAssign::BCvt) + return false; + } + + // Get a count of how many bytes are to be pushed onto the stack. + NumBytes = CCInfo.getNextStackOffset(); + + // Issue CALLSEQ_START. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TII.getCallFrameSetupOpcode())) + .addImm(NumBytes); + + // Prepare to assign register arguments. Every argument uses up a + // GPR protocol register even if it's passed in a floating-point + // register. + unsigned NextGPR = PPC::X3; + unsigned NextFPR = PPC::F1; + + // Process arguments. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + unsigned Arg = ArgRegs[VA.getValNo()]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Handle argument promotion and bitcasts. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: { + MVT DestVT = VA.getLocVT(); + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false)) + llvm_unreachable("Failed to emit a sext!"); + ArgVT = DestVT; + Arg = TmpReg; + break; + } + case CCValAssign::AExt: + case CCValAssign::ZExt: { + MVT DestVT = VA.getLocVT(); + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true)) + llvm_unreachable("Failed to emit a zext!"); + ArgVT = DestVT; + Arg = TmpReg; + break; + } + case CCValAssign::BCvt: { + // FIXME: Not yet handled. + llvm_unreachable("Should have bailed before getting here!"); + break; + } + } + + // Copy this argument to the appropriate register. + unsigned ArgReg; + if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { + ArgReg = NextFPR++; + ++NextGPR; + } else + ArgReg = NextGPR++; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ArgReg).addReg(Arg); + RegArgs.push_back(ArgReg); + } + + return true; +} + +// For a call that we've determined we can fast-select, finish the +// call sequence and generate a copy to obtain the return value (if any). +void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes, bool IsVarArg) { + // Issue CallSEQ_END. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TII.getCallFrameDestroyOpcode())) + .addImm(NumBytes).addImm(0); + + // Next, generate a copy to obtain the return value. + // FIXME: No multi-register return values yet, though I don't foresee + // any real difficulties there. + if (RetVT != MVT::isVoid) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + CCValAssign &VA = RVLocs[0]; + assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); + assert(VA.isRegLoc() && "Can only return in registers!"); + + MVT DestVT = VA.getValVT(); + MVT CopyVT = DestVT; + + // Ints smaller than a register still arrive in a full 64-bit + // register, so make sure we recognize this. + if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) + CopyVT = MVT::i64; + + unsigned SourcePhysReg = VA.getLocReg(); + unsigned ResultReg = 0; + + if (RetVT == CopyVT) { + const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); + ResultReg = createResultReg(CpyRC); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SourcePhysReg); + + // If necessary, round the floating result to single precision. + } else if (CopyVT == MVT::f64) { + ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), + ResultReg).addReg(SourcePhysReg); + + // If only the low half of a general register is needed, generate + // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be + // used along the fast-isel path (not lowered), and downstream logic + // also doesn't like a direct subreg copy on a physical reg.) + } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) { + ResultReg = createResultReg(&PPC::GPRCRegClass); + // Convert physical register from G8RC to GPRC. + SourcePhysReg -= PPC::X0 - PPC::R0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SourcePhysReg); + } + + assert(ResultReg && "ResultReg unset!"); + UsedRegs.push_back(SourcePhysReg); + UpdateValueMap(I, ResultReg); + } +} + +// Attempt to fast-select a call instruction. +bool PPCFastISel::SelectCall(const Instruction *I) { + const CallInst *CI = cast<CallInst>(I); + const Value *Callee = CI->getCalledValue(); + + // Can't handle inline asm. + if (isa<InlineAsm>(Callee)) + return false; + + // Allow SelectionDAG isel to handle tail calls. + if (CI->isTailCall()) + return false; + + // Obtain calling convention. + ImmutableCallSite CS(CI); + CallingConv::ID CC = CS.getCallingConv(); + + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + bool IsVarArg = FTy->isVarArg(); + + // Not ready for varargs yet. + if (IsVarArg) + return false; + + // Handle simple calls for now, with legal return types and + // those that can be extended. + Type *RetTy = I->getType(); + MVT RetVT; + if (RetTy->isVoidTy()) + RetVT = MVT::isVoid; + else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && + RetVT != MVT::i8) + return false; + + // FIXME: No multi-register return values yet. + if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && + RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 && + RetVT != MVT::f64) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + if (RVLocs.size() > 1) + return false; + } + + // Bail early if more than 8 arguments, as we only currently + // handle arguments passed in registers. + unsigned NumArgs = CS.arg_size(); + if (NumArgs > 8) + return false; + + // Set up the argument vectors. + SmallVector<Value*, 8> Args; + SmallVector<unsigned, 8> ArgRegs; + SmallVector<MVT, 8> ArgVTs; + SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; + + Args.reserve(NumArgs); + ArgRegs.reserve(NumArgs); + ArgVTs.reserve(NumArgs); + ArgFlags.reserve(NumArgs); + + for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end(); + II != IE; ++II) { + // FIXME: ARM does something for intrinsic calls here, check into that. + + unsigned AttrIdx = II - CS.arg_begin() + 1; + + // Only handle easy calls for now. It would be reasonably easy + // to handle <= 8-byte structures passed ByVal in registers, but we + // have to ensure they are right-justified in the register. + if (CS.paramHasAttr(AttrIdx, Attribute::InReg) || + CS.paramHasAttr(AttrIdx, Attribute::StructRet) || + CS.paramHasAttr(AttrIdx, Attribute::Nest) || + CS.paramHasAttr(AttrIdx, Attribute::ByVal)) + return false; + + ISD::ArgFlagsTy Flags; + if (CS.paramHasAttr(AttrIdx, Attribute::SExt)) + Flags.setSExt(); + if (CS.paramHasAttr(AttrIdx, Attribute::ZExt)) + Flags.setZExt(); + + Type *ArgTy = (*II)->getType(); + MVT ArgVT; + if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) + return false; + + if (ArgVT.isVector()) + return false; + + unsigned Arg = getRegForValue(*II); + if (Arg == 0) + return false; + + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + Flags.setOrigAlign(OriginalAlignment); + + Args.push_back(*II); + ArgRegs.push_back(Arg); + ArgVTs.push_back(ArgVT); + ArgFlags.push_back(Flags); + } + + // Process the arguments. + SmallVector<unsigned, 8> RegArgs; + unsigned NumBytes; + + if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, + RegArgs, CC, NumBytes, IsVarArg)) + return false; + + // FIXME: No handling for function pointers yet. This requires + // implementing the function descriptor (OPD) setup. + const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); + if (!GV) + return false; + + // Build direct call with NOP for TOC restore. + // FIXME: We can and should optimize away the NOP for local calls. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(PPC::BL8_NOP)); + // Add callee. + MIB.addGlobalAddress(GV); + + // Add implicit physical register uses to the call. + for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) + MIB.addReg(RegArgs[II], RegState::Implicit); + + // Add a register mask with the call-preserved registers. Proper + // defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + + // Finish off the call including any return values. + SmallVector<unsigned, 4> UsedRegs; + finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg); + + // Set all unused physregs defs as dead. + static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + + return true; +} + +// Attempt to fast-select a return instruction. +bool PPCFastISel::SelectRet(const Instruction *I) { + + if (!FuncInfo.CanLowerReturn) + return false; + + const ReturnInst *Ret = cast<ReturnInst>(I); + const Function &F = *I->getParent()->getParent(); + + // Build a list of return value registers. + SmallVector<unsigned, 4> RetRegs; + CallingConv::ID CC = F.getCallingConv(); + + if (Ret->getNumOperands() > 0) { + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ValLocs; + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context); + CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS); + const Value *RV = Ret->getOperand(0); + + // FIXME: Only one output register for now. + if (ValLocs.size() > 1) + return false; + + // Special case for returning a constant integer of any size. + // Materialize the constant as an i64 and copy it to the return + // register. This avoids an unnecessary extend or truncate. + if (isa<ConstantInt>(*RV)) { + const Constant *C = cast<Constant>(RV); + unsigned SrcReg = PPCMaterializeInt(C, MVT::i64); + unsigned RetReg = ValLocs[0].getLocReg(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + RetReg).addReg(SrcReg); + RetRegs.push_back(RetReg); + + } else { + unsigned Reg = getRegForValue(RV); + + if (Reg == 0) + return false; + + // Copy the result values into the output registers. + for (unsigned i = 0; i < ValLocs.size(); ++i) { + + CCValAssign &VA = ValLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + RetRegs.push_back(VA.getLocReg()); + unsigned SrcReg = Reg + VA.getValNo(); + + EVT RVEVT = TLI.getValueType(RV->getType()); + if (!RVEVT.isSimple()) + return false; + MVT RVVT = RVEVT.getSimpleVT(); + MVT DestVT = VA.getLocVT(); + + if (RVVT != DestVT && RVVT != MVT::i8 && + RVVT != MVT::i16 && RVVT != MVT::i32) + return false; + + if (RVVT != DestVT) { + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + llvm_unreachable("Full value assign but types don't match?"); + case CCValAssign::AExt: + case CCValAssign::ZExt: { + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true)) + return false; + SrcReg = TmpReg; + break; + } + case CCValAssign::SExt: { + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false)) + return false; + SrcReg = TmpReg; + break; + } + } + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), RetRegs[i]) + .addReg(SrcReg); + } + } + } + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(PPC::BLR)); + + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); + + return true; +} + +// Attempt to emit an integer extend of SrcReg into DestReg. Both +// signed and zero extensions are supported. Return false if we +// can't handle it. +bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + unsigned DestReg, bool IsZExt) { + if (DestVT != MVT::i32 && DestVT != MVT::i64) + return false; + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32) + return false; + + // Signed extensions use EXTSB, EXTSH, EXTSW. + if (!IsZExt) { + unsigned Opc; + if (SrcVT == MVT::i8) + Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64; + else if (SrcVT == MVT::i16) + Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64; + else { + assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); + Opc = PPC::EXTSW_32_64; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Unsigned 32-bit extensions use RLWINM. + } else if (DestVT == MVT::i32) { + unsigned MB; + if (SrcVT == MVT::i8) + MB = 24; + else { + assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); + MB = 16; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM), + DestReg) + .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); + + // Unsigned 64-bit extensions use RLDICL (with a 32-bit source). + } else { + unsigned MB; + if (SrcVT == MVT::i8) + MB = 56; + else if (SrcVT == MVT::i16) + MB = 48; + else + MB = 32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(PPC::RLDICL_32_64), DestReg) + .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); + } + + return true; +} + +// Attempt to fast-select an indirect branch instruction. +bool PPCFastISel::SelectIndirectBr(const Instruction *I) { + unsigned AddrReg = getRegForValue(I->getOperand(0)); + if (AddrReg == 0) + return false; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8)) + .addReg(AddrReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8)); + + const IndirectBrInst *IB = cast<IndirectBrInst>(I); + for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); + + return true; +} + +// Attempt to fast-select an integer truncate instruction. +bool PPCFastISel::SelectTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) + return false; + + if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // The only interesting case is when we need to switch register classes. + if (SrcVT == MVT::i64) { + unsigned ResultReg = createResultReg(&PPC::GPRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(SrcReg, 0, PPC::sub_32); + SrcReg = ResultReg; + } + + UpdateValueMap(I, SrcReg); + return true; +} + +// Attempt to fast-select an integer extend instruction. +bool PPCFastISel::SelectIntExt(const Instruction *I) { + Type *DestTy = I->getType(); + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + + bool IsZExt = isa<ZExtInst>(I); + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) return false; + + EVT SrcEVT, DestEVT; + SrcEVT = TLI.getValueType(SrcTy, true); + DestEVT = TLI.getValueType(DestTy, true); + if (!SrcEVT.isSimple()) + return false; + if (!DestEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); + + // If we know the register class needed for the result of this + // instruction, use it. Otherwise pick the register class of the + // correct size that does not contain X0/R0, since we don't know + // whether downstream uses permit that assignment. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + (AssignedReg ? MRI.getRegClass(AssignedReg) : + (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : + &PPC::GPRC_and_GPRC_NOR0RegClass)); + unsigned ResultReg = createResultReg(RC); + + if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt)) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + // Attempt to fast-select an instruction that wasn't handled by -// the table-generated machinery. TBD. +// the table-generated machinery. bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { - return I && false; + + switch (I->getOpcode()) { + case Instruction::Load: + return SelectLoad(I); + case Instruction::Store: + return SelectStore(I); + case Instruction::Br: + return SelectBranch(I); + case Instruction::IndirectBr: + return SelectIndirectBr(I); + case Instruction::FPExt: + return SelectFPExt(I); + case Instruction::FPTrunc: + return SelectFPTrunc(I); + case Instruction::SIToFP: + return SelectIToFP(I, /*IsSigned*/ true); + case Instruction::UIToFP: + return SelectIToFP(I, /*IsSigned*/ false); + case Instruction::FPToSI: + return SelectFPToI(I, /*IsSigned*/ true); + case Instruction::FPToUI: + return SelectFPToI(I, /*IsSigned*/ false); + case Instruction::Add: + return SelectBinaryIntOp(I, ISD::ADD); + case Instruction::Or: + return SelectBinaryIntOp(I, ISD::OR); + case Instruction::Sub: + return SelectBinaryIntOp(I, ISD::SUB); + case Instruction::Call: + if (dyn_cast<IntrinsicInst>(I)) + return false; + return SelectCall(I); + case Instruction::Ret: + return SelectRet(I); + case Instruction::Trunc: + return SelectTrunc(I); + case Instruction::ZExt: + case Instruction::SExt: + return SelectIntExt(I); + // Here add other flavors of Instruction::XXX that automated + // cases don't catch. For example, switches are terminators + // that aren't yet handled. + default: + break; + } + return false; } // Materialize a floating-point constant into a register, and return @@ -131,21 +1802,94 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align); - // For small code model, generate a LDtocCPT. - if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) + unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; + unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + + // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). + if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT), - DestReg) - .addConstantPoolIndex(Idx).addReg(PPC::X2).addMemOperand(MMO); - else { + TmpReg) + .addConstantPoolIndex(Idx).addReg(PPC::X2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addImm(0).addReg(TmpReg).addMemOperand(MMO); + } else { // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; - unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) - .addReg(TmpReg) - .addMemOperand(MMO); + // But for large code model, we must generate a LDtocL followed + // by the LF[SD]. + if (CModel == CodeModel::Large) { + unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addImm(0).addReg(TmpReg2); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) + .addReg(TmpReg) + .addMemOperand(MMO); + } + + return DestReg; +} + +// Materialize the address of a global value into a register, and return +// the register number (or zero if we failed to handle it). +unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { + assert(VT == MVT::i64 && "Non-address!"); + const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass; + unsigned DestReg = createResultReg(RC); + + // Global values may be plain old object addresses, TLS object + // addresses, constant pool entries, or jump tables. How we generate + // code for these may depend on small, medium, or large code model. + CodeModel::Model CModel = TM.getCodeModel(); + + // FIXME: Jump tables are not yet required because fast-isel doesn't + // handle switches; if that changes, we need them as well. For now, + // what follows assumes everything's a generic (or TLS) global address. + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + if (!GVar) { + // If GV is an alias, use the aliasee for determining thread-locality. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); + } + + // FIXME: We don't yet handle the complexity of TLS. + bool IsTLS = GVar && GVar->isThreadLocal(); + if (IsTLS) + return 0; + + // For small code model, generate a simple TOC load. + if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg) + .addGlobalAddress(GV).addReg(PPC::X2); + else { + // If the address is an externally defined symbol, a symbol with + // common or externally available linkage, a function address, or a + // jump table address (not yet needed), or if we are generating code + // for large code model, we generate: + // LDtocL(GV, ADDIStocHA(%X2, GV)) + // Otherwise we generate: + // ADDItocL(ADDIStocHA(%X2, GV), GV) + // Either way, start with the ADDIStocHA: + unsigned HighPartReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), + HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); + + // !GVar implies a function address. An external variable is one + // without an initializer. + // If/when switches are implemented, jump tables should be handled + // on the "if" path here. + if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() || + GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage()) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + DestReg).addGlobalAddress(GV).addReg(HighPartReg); + else + // Otherwise generate the ADDItocL. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL), + DestReg).addReg(HighPartReg).addGlobalAddress(GV); } return DestReg; @@ -283,23 +2027,112 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return PPCMaterializeFP(CFP, VT); + else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return PPCMaterializeGV(GV, VT); else if (isa<ConstantInt>(C)) return PPCMaterializeInt(C, VT); - // TBD: Global values. return 0; } // Materialize the address created by an alloca into a register, and -// return the register number (or zero if we failed to handle it). TBD. +// return the register number (or zero if we failed to handle it). unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { - return AI && 0; + // Don't handle dynamic allocas. + if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; + + MVT VT; + if (!isLoadTypeLegal(AI->getType(), VT)) return 0; + + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + ResultReg).addFrameIndex(SI->second).addImm(0); + return ResultReg; + } + + return 0; } -// Fold loads into extends when possible. TBD. +// Fold loads into extends when possible. +// FIXME: We can have multiple redundant extend/trunc instructions +// following a load. The folding only picks up one. Extend this +// to check subsequent instructions for the same pattern and remove +// them. Thus ResultReg should be the def reg for the last redundant +// instruction in a chain, and all intervening instructions can be +// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll +// to add ELF64-NOT: rldicl to the appropriate tests when this works. bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) { - return MI && OpNo && LI && false; + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(LI->getType(), VT)) + return false; + + // Combine load followed by zero- or sign-extend. + bool IsZExt = false; + switch(MI->getOpcode()) { + default: + return false; + + case PPC::RLDICL: + case PPC::RLDICL_32_64: { + IsZExt = true; + unsigned MB = MI->getOperand(3).getImm(); + if ((VT == MVT::i8 && MB <= 56) || + (VT == MVT::i16 && MB <= 48) || + (VT == MVT::i32 && MB <= 32)) + break; + return false; + } + + case PPC::RLWINM: + case PPC::RLWINM8: { + IsZExt = true; + unsigned MB = MI->getOperand(3).getImm(); + if ((VT == MVT::i8 && MB <= 24) || + (VT == MVT::i16 && MB <= 16)) + break; + return false; + } + + case PPC::EXTSB: + case PPC::EXTSB8: + case PPC::EXTSB8_32_64: + /* There is no sign-extending load-byte instruction. */ + return false; + + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: { + if (VT != MVT::i16 && VT != MVT::i8) + return false; + break; + } + + case PPC::EXTSW: + case PPC::EXTSW_32_64: { + if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8) + return false; + break; + } + } + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(LI->getOperand(0), Addr)) + return false; + + unsigned ResultReg = MI->getOperand(0).getReg(); + + if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt)) + return false; + + MI->eraseFromParent(); + return true; } // Attempt to lower call arguments in a faster way than done by @@ -312,6 +2145,81 @@ bool PPCFastISel::FastLowerArguments() { return false; } +// Handle materializing integer constants into a register. This is not +// automatically generated for PowerPC, so must be explicitly created here. +unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { + + if (Opc != ISD::Constant) + return 0; + + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && + VT != MVT::i8 && VT != MVT::i1) + return 0; + + const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : + &PPC::GPRCRegClass); + if (VT == MVT::i64) + return PPCMaterialize64BitInt(Imm, RC); + else + return PPCMaterialize32BitInt(Imm, RC); +} + +// Override for ADDI and ADDI8 to set the correct register class +// on RHS operand 0. The automatic infrastructure naively assumes +// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost +// for these cases. At the moment, none of the other automatically +// generated RI instructions require special treatment. However, once +// SelectSelect is implemented, "isel" requires similar handling. +// +// Also be conservative about the output register class. Avoid +// assigning R0 or X0 to the output register for GPRC and G8RC +// register classes, as any such result could be used in ADDI, etc., +// where those regs have another meaning. +unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { + if (MachineInstOpcode == PPC::ADDI) + MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass); + else if (MachineInstOpcode == PPC::ADDI8) + MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass); + + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC, + Op0, Op0IsKill, Imm); +} + +// Override for instructions with one register operand to avoid use of +// R0/X0. The automatic infrastructure isn't aware of the context so +// we must be conservative. +unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass* RC, + unsigned Op0, bool Op0IsKill) { + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); +} + +// Override for instructions with two register operands to avoid use +// of R0/X0. The automatic infrastructure isn't aware of the context +// so we must be conservative. +unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass* RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, + Op1, Op1IsKill); +} + namespace llvm { // Create the fast instruction selector for PowerPC64 ELF. FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 24d3a0b..0ac2ced 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -204,10 +204,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, unsigned FrameSize = UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); - // Get the alignments provided by the target, and the maximum alignment - // (if any) of the fixed frame objects. - unsigned TargetAlign = getStackAlignment(); - unsigned MaxAlign = MFI->getMaxAlignment(); + // Get stack alignments. The frame must be aligned to the greatest of these: + unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI + unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; const PPCRegisterInfo *RegInfo = @@ -346,12 +345,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { bool needsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); + // Get processor type. + bool isPPC64 = Subtarget.isPPC64(); + // Get the ABI. + bool isDarwinABI = Subtarget.isDarwinABI(); + bool isSVR4ABI = Subtarget.isSVR4ABI(); + assert((isDarwinABI || isSVR4ABI) && + "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); + // Prepare for frame info. MCSymbol *FrameLabel = 0; // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. - if (!Subtarget.isSVR4ABI()) + if (!isSVR4ABI) for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { HandleVRSaveUpdate(MBBI, TII); @@ -371,23 +378,52 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (MFI->isFrameAddressTaken()) replaceFPWithRealFP(MF); - // Get processor type. - bool isPPC64 = Subtarget.isPPC64(); - // Get operating system - bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); - // Do we have a frame pointer for this function? + // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); + unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; + unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30; + unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; + unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; + unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg + // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) + const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 + : PPC::MFLR ); + const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD + : PPC::STW ); + const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU + : PPC::STWU ); + const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX + : PPC::STWUX); + const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 + : PPC::LIS ); + const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 + : PPC::ORI ); + const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 + : PPC::OR ); + const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 + : PPC::SUBFC); + const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 + : PPC::SUBFIC); + + // Regarding this assert: Even though LR is saved in the caller's frame (i.e., + // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no + // Red Zone, an asynchronous event (a form of "callee") could claim a frame & + // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. + assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && + "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); int FPOffset = 0; if (HasFP) { - if (Subtarget.isSVR4ABI()) { + if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int FPIndex = FI->getFramePointerSaveIndex(); assert(FPIndex && "No Frame Pointer Save Slot!"); @@ -399,7 +435,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { int BPOffset = 0; if (HasBP) { - if (Subtarget.isSVR4ABI()) { + if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int BPIndex = FI->getBasePointerSaveIndex(); assert(BPIndex && "No Base Pointer Save Slot!"); @@ -410,181 +446,116 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } } - if (isPPC64) { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); + // Get stack alignments. + unsigned MaxAlign = MFI->getMaxAlignment(); + if (HasBP && MaxAlign > 1) + assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && + "Invalid alignment!"); + + // Frames of 32KB & larger require special handling because they cannot be + // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. + bool isLargeFrame = !isInt<16>(NegFrameSize); - if (!MustSaveCRs.empty()) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12); - for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); - } + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(PPC::X31) - .addImm(FPOffset) - .addReg(PPC::X1); - - if (HasBP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(PPC::X30) - .addImm(BPOffset) - .addReg(PPC::X1); - - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(PPC::X0) - .addImm(LROffset) - .addReg(PPC::X1); - - if (!MustSaveCRs.empty()) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) - .addReg(PPC::X12, getKillRegState(true)) - .addImm(8) - .addReg(PPC::X1); - } else { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); - - if (HasFP) - // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative - // offsets of R1 is not allowed. - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(PPC::R31) - .addImm(FPOffset) - .addReg(PPC::R1); - - if (HasBP) - // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative - // offsets of R1 is not allowed. - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(PPC::R30) - .addImm(BPOffset) - .addReg(PPC::R1); - - assert(MustSaveCRs.empty() && - "Prologue CR saving supported only in 64-bit mode"); - - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(PPC::R0) - .addImm(LROffset) - .addReg(PPC::R1); + assert((isPPC64 || MustSaveCRs.empty()) && + "Prologue CR saving supported only in 64-bit mode"); + + if (!MustSaveCRs.empty()) { // will only occur for PPC64 + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); } - // Skip if a leaf routine. + if (HasFP) + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(FPReg) + .addImm(FPOffset) + .addReg(SPReg); + + if (HasBP) + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(BPReg) + .addImm(BPOffset) + .addReg(SPReg); + + if (MustSaveLR) + // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe. + BuildMI(MBB, MBBI, dl, StoreInst) + .addReg(ScratchReg) + .addImm(LROffset) + .addReg(SPReg); + + if (!MustSaveCRs.empty()) // will only occur for PPC64 + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) + .addReg(TempReg, getKillRegState(true)) + .addImm(8) + .addReg(SPReg); + + // Skip the rest if this is a leaf function & all spills fit in the Red Zone. if (!FrameSize) return; - // Get stack alignments. - unsigned MaxAlign = MFI->getMaxAlignment(); - // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now - if (!isPPC64) { - // PPC32. - - if (HasBP) { - // Save a copy of r1 as the base pointer. - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R30) - .addReg(PPC::R1) - .addReg(PPC::R1); - } - if (HasBP && MaxAlign > 1) { - assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && - "Invalid alignment!"); + if (HasBP) { + // Save a copy of r1 as the base pointer. + BuildMI(MBB, MBBI, dl, OrInst, BPReg) + .addReg(SPReg) + .addReg(SPReg); + } - BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R1) + if (HasBP && MaxAlign > 1) { + if (isPPC64) + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) + .addReg(SPReg) + .addImm(0) + .addImm(64 - Log2_32(MaxAlign)); + else // PPC32... + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2_32(MaxAlign)) .addImm(31); - if (isInt<16>(NegFrameSize)) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(NegFrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R12) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R12) - .addReg(PPC::R12, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addReg(PPC::R12, RegState::Kill); - } - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) - .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1) - .addReg(PPC::R0); - } else if (isInt<16>(NegFrameSize)) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) - .addReg(PPC::R1) - .addImm(NegFrameSize) - .addReg(PPC::R1); + if (!isLargeFrame) { + BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(NegFrameSize); } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) - .addReg(PPC::R0, RegState::Kill) + BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) + .addReg(TempReg, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) - .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1) - .addReg(PPC::R0); - } - } else { // PPC64. - if (HasBP) { - // Save a copy of r1 as the base pointer. - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X30) - .addReg(PPC::X1) - .addReg(PPC::X1); + BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addReg(TempReg, RegState::Kill); } + BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) + .addReg(SPReg, RegState::Kill) + .addReg(SPReg) + .addReg(ScratchReg); - if (HasBP && MaxAlign > 1) { - assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && - "Invalid alignment!"); + } else if (!isLargeFrame) { + BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) + .addReg(SPReg) + .addImm(NegFrameSize) + .addReg(SPReg); - BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) - .addReg(PPC::X1) - .addImm(0) - .addImm(64 - Log2_32(MaxAlign)); - if (isInt<16>(NegFrameSize)) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addImm(NegFrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X12) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X12) - .addReg(PPC::X12, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X12, RegState::Kill); - } - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1) - .addReg(PPC::X0); - } else if (isInt<16>(NegFrameSize)) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) - .addReg(PPC::X1) - .addImm(NegFrameSize) - .addReg(PPC::X1); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1) - .addReg(PPC::X0); - } + } else { + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) + .addReg(SPReg, RegState::Kill) + .addReg(SPReg) + .addReg(ScratchReg); } // Add the "machine moves" for the instructions we generated above, but in @@ -600,22 +571,19 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize)); if (HasFP) { - unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31; - Reg = MRI->getDwarfRegNum(Reg, true); + unsigned Reg = MRI->getDwarfRegNum(FPReg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset)); } if (HasBP) { - unsigned Reg = isPPC64 ? PPC::X30 : PPC::R30; - Reg = MRI->getDwarfRegNum(Reg, true); + unsigned Reg = MRI->getDwarfRegNum(BPReg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset)); } if (MustSaveLR) { - unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR; - Reg = MRI->getDwarfRegNum(Reg, true); + unsigned Reg = MRI->getDwarfRegNum(LRReg, true); MMI.addFrameInst( MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset)); } @@ -625,15 +593,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // If there is a frame pointer, copy R1 into R31 if (HasFP) { - if (!isPPC64) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) - .addReg(PPC::R1) - .addReg(PPC::R1); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31) - .addReg(PPC::X1) - .addReg(PPC::X1); - } + BuildMI(MBB, MBBI, dl, OrInst, FPReg) + .addReg(SPReg) + .addReg(SPReg); if (needsFrameMoves) { ReadyLabel = MMI.getContext().CreateTempSymbol(); @@ -641,9 +603,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Mark effective beginning of when frame pointer is ready. BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); - unsigned Reg = HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) - : (isPPC64 ? PPC::X1 : PPC::R1); - Reg = MRI->getDwarfRegNum(Reg, true); + unsigned Reg = MRI->getDwarfRegNum(FPReg, true); MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg)); } } @@ -664,19 +624,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // For SVR4, don't emit a move for the CR spill slot if we haven't // spilled CRs. - if (Subtarget.isSVR4ABI() - && (PPC::CR2 <= Reg && Reg <= PPC::CR4) - && MustSaveCRs.empty()) - continue; + if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) + && MustSaveCRs.empty()) + continue; // For 64-bit SVR4 when we have spilled CRs, the spill location // is SP+8, not a frame-relative slot. - if (Subtarget.isSVR4ABI() - && Subtarget.isPPC64() - && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { MMI.addFrameInst(MCCFIInstruction::createOffset( Label, MRI->getDwarfRegNum(PPC::CR2, true), 8)); - continue; + continue; } int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); @@ -707,7 +664,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, RetOpcode == PPC::TCRETURNai8) && "Can only insert epilog into returning blocks"); - // Get alignment info so we know how to restore r1 + // Get alignment info so we know how to restore the SP. const MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes allocated from the FrameInfo. @@ -715,21 +672,41 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Get processor type. bool isPPC64 = Subtarget.isPPC64(); - // Get operating system + // Get the ABI. bool isDarwinABI = Subtarget.isDarwinABI(); + bool isSVR4ABI = Subtarget.isSVR4ABI(); + // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); - // Do we have a frame pointer for this function? + // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); + unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; + unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30; + unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; + unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg + const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 + : PPC::MTLR ); + const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD + : PPC::LWZ ); + const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 + : PPC::LIS ); + const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 + : PPC::ORI ); + const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 + : PPC::ADDI ); + const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 + : PPC::ADD4 ); + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); int FPOffset = 0; if (HasFP) { - if (Subtarget.isSVR4ABI()) { + if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int FPIndex = FI->getFramePointerSaveIndex(); assert(FPIndex && "No Frame Pointer Save Slot!"); @@ -741,7 +718,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, int BPOffset = 0; if (HasBP) { - if (Subtarget.isSVR4ABI()) { + if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); int BPIndex = FI->getBasePointerSaveIndex(); assert(BPIndex && "No Base Pointer Save Slot!"); @@ -773,106 +750,76 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, FrameSize += StackAdj; } + // Frames of 32KB & larger require special handling because they cannot be + // indexed into with a simple LD/LWZ immediate offset operand. + bool isLargeFrame = !isInt<16>(FrameSize); + if (FrameSize) { - // The loaded (or persistent) stack pointer value is offset by the 'stwu' - // on entry to the function. Add this offset back now. - if (!isPPC64) { - // If this function contained a fastcc call and GuaranteedTailCallOpt is - // enabled (=> hasFastCall()==true) the fastcc call might contain a tail - // call which invalidates the stack pointer value in SP(0). So we use the - // value of R31 in this case. - if (FI->hasFastCall() && isInt<16>(FrameSize)) { - assert(hasFP(MF) && "Expecting a valid the frame pointer."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) - .addReg(PPC::R31).addImm(FrameSize); - } else if(FI->hasFastCall()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(FrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4)) - .addReg(PPC::R1) - .addReg(PPC::R31) - .addReg(PPC::R0); - } else if (isInt<16>(FrameSize) && - !HasBP && - !MFI->hasVarSizedObjects()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) - .addReg(PPC::R1).addImm(FrameSize); + // In the prologue, the loaded (or persistent) stack pointer value is offset + // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now. + + // If this function contained a fastcc call and GuaranteedTailCallOpt is + // enabled (=> hasFastCall()==true) the fastcc call might contain a tail + // call which invalidates the stack pointer value in SP(0). So we use the + // value of R31 in this case. + if (FI->hasFastCall()) { + assert(HasFP && "Expecting a valid frame pointer."); + if (!isLargeFrame) { + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(FPReg).addImm(FrameSize); } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1) - .addImm(0).addReg(PPC::R1); - } - } else { - if (FI->hasFastCall() && isInt<16>(FrameSize)) { - assert(hasFP(MF) && "Expecting a valid the frame pointer."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) - .addReg(PPC::X31).addImm(FrameSize); - } else if(FI->hasFastCall()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) .addImm(FrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8)) - .addReg(PPC::X1) - .addReg(PPC::X31) - .addReg(PPC::X0); - } else if (isInt<16>(FrameSize) && !HasBP && - !MFI->hasVarSizedObjects()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) - .addReg(PPC::X1).addImm(FrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1) - .addImm(0).addReg(PPC::X1); + BuildMI(MBB, MBBI, dl, AddInst) + .addReg(SPReg) + .addReg(FPReg) + .addReg(ScratchReg); } + } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(SPReg) + .addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, LoadInst, SPReg) + .addImm(0) + .addReg(SPReg); } - } - if (isPPC64) { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) - .addImm(LROffset).addReg(PPC::X1); - - if (!MustSaveCRs.empty()) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12) - .addImm(8).addReg(PPC::X1); + } - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) - .addImm(FPOffset).addReg(PPC::X1); + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) + .addImm(LROffset) + .addReg(SPReg); - if (HasBP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X30) - .addImm(BPOffset).addReg(PPC::X1); + assert((isPPC64 || MustSaveCRs.empty()) && + "Epilogue CR restoring supported only in 64-bit mode"); - if (!MustSaveCRs.empty()) - for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) - .addReg(PPC::X12, getKillRegState(i == e-1)); + if (!MustSaveCRs.empty()) // will only occur for PPC64 + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) + .addImm(8) + .addReg(SPReg); - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0); - } else { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0) - .addImm(LROffset).addReg(PPC::R1); + if (HasFP) + BuildMI(MBB, MBBI, dl, LoadInst, FPReg) + .addImm(FPOffset) + .addReg(SPReg); - assert(MustSaveCRs.empty() && - "Epilogue CR restoring supported only in 64-bit mode"); + if (HasBP) + BuildMI(MBB, MBBI, dl, LoadInst, BPReg) + .addImm(BPOffset) + .addReg(SPReg); - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) - .addImm(FPOffset).addReg(PPC::R1); + if (!MustSaveCRs.empty()) // will only occur for PPC64 + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) + .addReg(TempReg, getKillRegState(i == e-1)); - if (HasBP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R30) - .addImm(FPOffset).addReg(PPC::R1); - - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0); - } + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization @@ -880,27 +827,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); - unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; - unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI; if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { - BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) - .addReg(StackReg).addImm(CallerAllocatedAmt); + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(SPReg).addImm(CallerAllocatedAmt); } else { - BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) .addImm(CallerAllocatedAmt >> 16); - BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) - .addReg(TmpReg, RegState::Kill) + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + .addReg(ScratchReg, RegState::Kill) .addImm(CallerAllocatedAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) - .addReg(StackReg) + BuildMI(MBB, MBBI, dl, AddInst) + .addReg(SPReg) .addReg(FPReg) - .addReg(TmpReg); + .addReg(ScratchReg); } } else if (RetOpcode == PPC::TCRETURNdi) { MBBI = MBB.getLastNonDebugInstr(); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 475bde1..6ba6af6 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -876,8 +876,10 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // target-specific node if it hasn't already been changed. SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); - if (N->isMachineOpcode()) + if (N->isMachineOpcode()) { + N->setNodeId(-1); return NULL; // Already selected. + } switch (N->getOpcode()) { default: break; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 664dd12..8da5f05 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -149,28 +149,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + if (Subtarget->hasFCPSGN()) { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); + } else { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + } if (Subtarget->hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - - // frin does not implement "ties to even." Thus, this is safe only in - // fast-math mode. - if (TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - - // These need to set FE_INEXACT, and use a custom inserter. - setOperationAction(ISD::FRINT, MVT::f64, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - } + setOperationAction(ISD::FROUND, MVT::f32, Legal); } // PowerPC does not have BSWAP, CTPOP or CTTZ @@ -560,7 +556,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setInsertFencesForAtomic(true); - setSchedulingPreference(Sched::Hybrid); + if (Subtarget->enableMachineScheduler()) + setSchedulingPreference(Sched::Source); + else + setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); @@ -579,24 +578,47 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } } +/// getMaxByValAlign - Helper for getByValTypeAlignment to determine +/// the desired ByVal argument alignment. +static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, + unsigned MaxMaxAlign) { + if (MaxAlign == MaxMaxAlign) + return; + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { + if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) + MaxAlign = 32; + else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) + MaxAlign = 16; + } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + unsigned EltAlign = 0; + getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned EltAlign = 0; + getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + if (MaxAlign == MaxMaxAlign) + break; + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { - const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. - if (TM.getSubtarget<PPCSubtarget>().isDarwin()) + if (PPCSubTarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. - if (VectorType *VTy = dyn_cast<VectorType>(Ty)) - if (VTy->getBitWidth() >= 128) - return 16; - // The rest is 8 on PPC64 and 4 on PPC32 boundary. - if (PPCSubTarget.isPPC64()) - return 8; - - return 4; + unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; + if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) + getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); + return Align; } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -1386,6 +1408,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + // FIXME: TLS addresses currently use medium model code sequences, + // which is the most useful form. Eventually support for small and + // large models could be added if users need it, at the cost of + // additional complexity. GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); @@ -1814,6 +1840,12 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" +// Function whose sole purpose is to kill compiler warnings +// stemming from unused functions included from PPCGenCallingConv.inc. +CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { + return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; +} + bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, @@ -2276,6 +2308,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(FIN); continue; } + + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + CurArgOffset = ArgOffset; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (ObjSize < PtrByteSize) CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); @@ -3448,7 +3487,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { + } else if ((CallOpc == PPCISD::CALL) && + (!isLocalCall(Callee) || + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } @@ -3865,6 +3906,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > 8) { + if (BVAlign % PtrByteSize != 0) + llvm_unreachable( + "ByVal alignment is not a multiple of the pointer size"); + + ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; + } + // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); @@ -3956,7 +4006,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, continue; } - switch (Arg.getValueType().getSimpleVT().SimpleTy) { + switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: @@ -3979,7 +4029,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // must be passed right-justified in the stack doubleword, and // in the GPR, if one is available. SDValue StoreOff; - if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { + if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } else @@ -4287,7 +4337,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, continue; } - switch (Arg.getValueType().getSimpleVT().SimpleTy) { + switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: @@ -4752,7 +4802,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; - switch (Op.getValueType().getSimpleVT().SimpleTy) { + switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : @@ -6676,51 +6726,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); - } else if (MI->getOpcode() == PPC::FRINDrint || - MI->getOpcode() == PPC::FRINSrint) { - bool isf32 = MI->getOpcode() == PPC::FRINSrint; - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); - - // Perform the rounding. - BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) - .addReg(Src); - - // Compare the results. - BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) - .addReg(Dest).addReg(Src); - - // If the results were not equal, then set the FPSCR XX bit. - MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, midMBB); - F->insert(It, exitMBB); - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) - .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); - - BB->addSuccessor(midMBB); - BB->addSuccessor(exitMBB); - - BB = midMBB; - - // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set - // the FI bit here because that will not automatically set XX also, - // and XX is what libm interprets as the FE_INEXACT flag. - BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); - BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); - - BB->addSuccessor(exitMBB); - - BB = exitMBB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -7061,8 +7066,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (RV.getNode() != 0) { DCI.AddToWorklist(RV.getNode()); RV = DAGCombineFastRecip(RV, DCI); - if (RV.getNode() != 0) + if (RV.getNode() != 0) { + // Unfortunately, RV is now NaN if the input was exactly 0. Select out + // this case and force the answer to 0. + + EVT VT = RV.getValueType(); + + SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType()); + if (VT.isVector()) { + assert(VT.getVectorNumElements() == 4 && "Unknown vector type"); + Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero); + } + + SDValue ZeroCmp = + DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT), + N->getOperand(0), Zero, ISD::SETEQ); + DCI.AddToWorklist(ZeroCmp.getNode()); + DCI.AddToWorklist(RV.getNode()); + + RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT, + ZeroCmp, Zero, RV); return RV; + } } } @@ -7158,7 +7183,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); if (ISD::isNON_EXTLoad(N) && VT.isVector() && TM.getSubtarget<PPCSubtarget>().hasAltivec() && - DCI.getDAGCombineLevel() == AfterLegalizeTypes && + (VT == MVT::v16i8 || VT == MVT::v8i16 || + VT == MVT::v4i32 || VT == MVT::v4f32) && LD->getAlignment() < ABIAlignment) { // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); @@ -7302,6 +7328,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + break; case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && @@ -7645,7 +7673,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, PPC::GPRCRegClass.contains(R.first)) { const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); return std::make_pair(TRI->getMatchingSuperReg(R.first, - PPC::sub_32, &PPC::GPRCRegClass), + PPC::sub_32, &PPC::G8RCRegClass), &PPC::G8RCRegClass); } @@ -7896,7 +7924,7 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { } Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { - if (DisableILPPref) + if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index aa5e821..df3af35 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -627,6 +627,8 @@ namespace llvm { SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const; + + CCAssignFn *useFastISelCCs(unsigned Flag) const; }; namespace PPC { diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index f78bb38..46db4fe 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -270,6 +270,7 @@ def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; @@ -506,6 +507,14 @@ defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS), [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; } // Interpretation64Bit +// For fast-isel: +let isCodeGenOnly = 1 in { +def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS), + "extsb $rA, $rS", IntSimple, []>, isPPC64; +def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS), + "extsh $rA, $rS", IntSimple, []>, isPPC64; +} // isCodeGenOnly for fast-isel + defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS), "extsw", "$rA, $rS", IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; @@ -520,16 +529,16 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), "cntlzd", "$rA, $rS", IntGeneral, [(set i64:$rA, (ctlz i64:$rS))]>; -defm POPCNTD : XForm_11r<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), - "popcntd", "$rA, $rS", IntGeneral, - [(set i64:$rA, (ctpop i64:$rS))]>; +def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), + "popcntd $rA, $rS", IntGeneral, + [(set i64:$rA, (ctpop i64:$rS))]>; // popcntw also does a population count on the high 32 bits (storing the // results in the high 32-bits of the output). We'll ignore that here (which is // safe because we never separately use the high part of the 64-bit registers). -defm POPCNTW : XForm_11r<31, 378, (outs gprc:$rA), (ins gprc:$rS), - "popcntw", "$rA, $rS", IntGeneral, - [(set i32:$rA, (ctpop i32:$rS))]>; +def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS), + "popcntw $rA, $rS", IntGeneral, + [(set i32:$rA, (ctpop i32:$rS))]>; defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divd", "$rT, $rA, $rB", IntDivD, @@ -569,6 +578,14 @@ defm RLDICL : MDForm_1r<30, 0, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; +// For fast-isel: +let isCodeGenOnly = 1 in +def RLDICL_32_64 : MDForm_1<30, 0, + (outs g8rc:$rA), + (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, + []>, isPPC64; +// End fast-isel. defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI, @@ -620,6 +637,15 @@ def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src), "lwax $rD, $src", LdStLHA, [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; +// For fast-isel: +let isCodeGenOnly = 1, mayLoad = 1 in { +def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src), + "lwa $rD, $src", LdStLWA, []>, isPPC64, + PPC970_DGroup_Cracked; +def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src), + "lwax $rD, $src", LdStLHA, []>, isPPC64, + PPC970_DGroup_Cracked; +} // end fast-isel isCodeGenOnly // Update forms. let mayLoad = 1, neverHasSideEffects = 1 in { @@ -942,6 +968,9 @@ let PPC970_Unit = 3, neverHasSideEffects = 1, defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), "fcfid", "$frD, $frB", FPGeneral, [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; +defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), + "fctid", "$frD, $frB", FPGeneral, + []>, isPPC64; defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), "fctidz", "$frD, $frB", FPGeneral, [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index fdea51d..a55abe3 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -229,35 +229,45 @@ let Predicates = [HasAltivec] in { let isCodeGenOnly = 1 in { def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStLoad /*FIXME*/, []>; + "dss $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStLoad /*FIXME*/, []>; + "dssall", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + Deprecated<DeprecatedDST>; } def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 42adc02..29233d4 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -398,6 +398,13 @@ class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let RST = 0; } +class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let B = 0; +} + class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { @@ -438,6 +445,17 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RS; + bits<1> L; + + let Inst{6-10} = RS; + let Inst{15} = L; + let Inst{21-30} = xo; +} + class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> { @@ -534,6 +552,21 @@ class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = RC; } +class XForm_0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let RST = 0; + let A = 0; + let B = 0; +} + +class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let RST = 0; + let A = 0; +} + // DCB_Form - Form X instruction, used for dcb* instructions. class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 375daee..315ad04 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,7 +33,7 @@ #include "llvm/Support/raw_ostream.h" #define GET_INSTRMAP_INFO -#define GET_INSTRINFO_CTOR +#define GET_INSTRINFO_CTOR_DTOR #include "PPCGenInstrInfo.inc" using namespace llvm; @@ -45,6 +45,9 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden); +// Pin the vtable to this file. +void PPCInstrInfo::anchor() {} + PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl()) {} @@ -985,6 +988,10 @@ bool PPCInstrInfo::SubsumesPredicate( if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR) return false; + // P1 can only subsume P2 if they test the same condition register. + if (Pred1[1].getReg() != Pred2[1].getReg()) + return false; + PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm(); PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm(); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index bd72a4d..f140c41 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -78,6 +78,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs, bool &NonRI, bool &SpillsVRS) const; + virtual void anchor(); public: explicit PPCInstrInfo(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 398a11b..2bd3aad 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -785,6 +785,20 @@ multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, } } +multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_28<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : XForm_28<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list<dag> pattern> { @@ -1678,6 +1692,9 @@ let isCompare = 1, neverHasSideEffects = 1 in { let Uses = [RM] in { let neverHasSideEffects = 1 in { + defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiw", "$frD, $frB", FPGeneral, + []>; defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), "fctiwz", "$frD, $frB", FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; @@ -1686,23 +1703,13 @@ let Uses = [RM] in { "frsp", "$frD, $frB", FPGeneral, [(set f32:$frD, (fround f64:$frB))]>; - // The frin -> nearbyint mapping is valid only in fast-math mode. let Interpretation64Bit = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), "frin", "$frD, $frB", FPGeneral, - [(set f64:$frD, (fnearbyint f64:$frB))]>; + [(set f64:$frD, (frnd f64:$frB))]>; defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), "frin", "$frD, $frB", FPGeneral, - [(set f32:$frD, (fnearbyint f32:$frB))]>; - } - - // These pseudos expand to rint but also set FE_INEXACT when the result does - // not equal the argument. - let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR! - def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB), - "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>; - def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB), - "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>; + [(set f32:$frD, (frnd f32:$frB))]>; } let neverHasSideEffects = 1 in { @@ -1772,6 +1779,14 @@ defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB), "fneg", "$frD, $frB", FPGeneral, [(set f64:$frD, (fneg f64:$frB))]>; +defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB), + "fcpsgn", "$frD, $frA, $frB", FPGeneral, + [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>; +let Interpretation64Bit = 1 in +defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB), + "fcpsgn", "$frD, $frA, $frB", FPGeneral, + [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>; + // Reciprocal estimates. defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), "fre", "$frD, $frB", FPGeneral, @@ -1855,7 +1870,7 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), "mtspr $SPR, $RT", SprMTSPR>; def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), - "mftb $RT, $SPR", SprMFTB>; + "mftb $RT, $SPR", SprMFTB>, Deprecated<DeprecatedMFTB>; let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), @@ -1927,6 +1942,7 @@ def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; @@ -2280,6 +2296,12 @@ def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B), def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B), (FNMSUBS $A, $C, $B)>; +// FCOPYSIGN's operand types need not agree. +def : Pat<(fcopysign f64:$frB, f32:$frA), + (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>; +def : Pat<(fcopysign f32:$frB, f64:$frA), + (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>; + include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" @@ -2300,6 +2322,35 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), "wait $L", LdStLoad, []>; +def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L), + "mtmsr $RS, $L", SprMTMSR>; + +def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), + "mfmsr $RT", SprMFMSR, []>; + +def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L), + "mtmsrd $RS, $L", SprMTMSRD>; + +def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB), + "slbie $RB", SprSLBIE, []>; + +def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB), + "slbmte $RS, $RB", SprSLBMTE, []>; + +def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB), + "slbmfee $RT, $RB", SprSLBMFEE, []>; + +def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>; + +def TLBSYNC : XForm_0<31, 566, (outs), (ins), + "tlbsync", SprTLBSYNC, []>; + +def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB), + "tlbiel $RB", SprTLBIEL, []>; + +def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB), + "tlbie $RB,$RS", SprTLBIE, []>; + //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases // @@ -2368,6 +2419,46 @@ def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>; +def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>; + +def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>; +def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>; +def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>; +def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>; + +def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>; +def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>; +def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>; +def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>; + +def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>; +def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>; +def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>; +def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>; + +def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>; +def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>; +def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>; +def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>; + +def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>; + +def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>; +def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>; + +def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>; + +def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>; +def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>; + +def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>; +def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>; +def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>; +def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>; + +def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>; + def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b", diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index d69aa4a..f61c8bf 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -69,7 +69,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ if (MO.isGlobal()) { StubSym = MachineModuleInfoImpl:: - StubValueTy(AP.Mang->getSymbol(MO.getGlobal()), + StubValueTy(AP.getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } else { Name.erase(Name.end()-5, Name.end()); @@ -95,7 +95,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ if (StubSym.getPointer() == 0) { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: - StubValueTy(AP.Mang->getSymbol(MO.getGlobal()), + StubValueTy(AP.getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } return Sym; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index adba613..19ccbfc 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -69,6 +69,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST) ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX; ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX; ImmToIdxMap[PPC::ADDI] = PPC::ADD4; + ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32; // 64-bit ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8; @@ -532,6 +533,7 @@ static bool usesIXAddr(const MachineInstr &MI) { default: return false; case PPC::LWA: + case PPC::LWA_32: case PPC::LD: case PPC::STD: return true; @@ -689,14 +691,6 @@ unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return TFI->hasFP(MF) ? PPC::X31 : PPC::X1; } -unsigned PPCRegisterInfo::getEHExceptionRegister() const { - return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3; -} - -unsigned PPCRegisterInfo::getEHHandlerRegister() const { - return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; -} - unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { if (!hasBasePointer(MF)) return getFrameRegister(MF); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index d02af9e..dd3bb40 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -97,10 +97,6 @@ public: bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; - - // Exception handling queries. - unsigned getEHExceptionRegister() const; - unsigned getEHHandlerRegister() const; }; } // end namespace llvm diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 660c0c3..92ba69c 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -108,6 +108,14 @@ def VecPerm : InstrItinClass; def VecFPRound : InstrItinClass; def VecVSL : InstrItinClass; def VecVSR : InstrItinClass; +def SprMTMSRD : InstrItinClass; +def SprSLIE : InstrItinClass; +def SprSLBIE : InstrItinClass; +def SprSLBMTE : InstrItinClass; +def SprSLBMFEE : InstrItinClass; +def SprSLBIA : InstrItinClass; +def SprTLBIEL : InstrItinClass; +def SprTLBIE : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 8d5838e..1612cd2 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -14,39 +14,8 @@ //===----------------------------------------------------------------------===// // Functional units on the PowerPC A2 chip sets // -def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1 -def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2 -def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3 -def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4 -def IU4_0 : FuncUnit; // Instruction buffer slot 1 -def IU4_1 : FuncUnit; // Instruction buffer slot 2 -def IU4_2 : FuncUnit; // Instruction buffer slot 3 -def IU4_3 : FuncUnit; // Instruction buffer slot 4 -def IU4_4 : FuncUnit; // Instruction buffer slot 5 -def IU4_5 : FuncUnit; // Instruction buffer slot 6 -def IU4_6 : FuncUnit; // Instruction buffer slot 7 -def IU4_7 : FuncUnit; // Instruction buffer slot 8 -def IU5 : FuncUnit; // Dependency resolution -def IU6 : FuncUnit; // Instruction issue -def RF0 : FuncUnit; -def XRF1 : FuncUnit; -def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline -def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline -def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline -def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline -def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline -def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline -def FRF1 : FuncUnit; -def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline -def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline -def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline -def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline -def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline -def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline - -def CR_Bypass : Bypass; // The bypass for condition regs. -//def GPR_Bypass : Bypass; // The bypass for general-purpose regs. -//def FPR_Bypass : Bypass; // The bypass for floating-point regs. +def XU : FuncUnit; // XU pipeline +def FU : FuncUnit; // FI pipeline // // This file defines the itinerary class data for the PPC A2 processor. @@ -55,699 +24,119 @@ def CR_Bypass : Bypass; // The bypass for condition regs. def PPCA2Itineraries : ProcessorItineraries< - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3, - IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7, - IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6, - FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6], - [CR_Bypass, GPR_Bypass, FPR_Bypass], [ - InstrItinData<IntSimple , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntGeneral , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntCompare , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntDivW , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>], - [53, 7, 7], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMFFS , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMTFSB0 , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulHW , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulHWU , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulLI , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntRotate , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntRotateD , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntRotateDI , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntShift , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntTrapW , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntTrapD , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<BrB , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<BrCR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData<BrMCR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData<BrMCRX , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7, 7], - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStDCBA , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 11], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStDCBF , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 11], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStDCBI , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 11], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLoad , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLoadUpd , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLDU , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStStore , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStStoreUpd, [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStICBI , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTFD , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<LdStSTFDU , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<LdStLFD , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLFDU , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7, 7], - [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLHA , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLHAU , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLMW , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [14, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLWARX , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [26, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTD , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStSTDU , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStSTDCX , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [26, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTWCX , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [26, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSync , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>, - InstrItinData<SprISYNC , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, - InstrItinData<SprMFSR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [GPR_Bypass, NoBypass]>, - InstrItinData<SprMTMSR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMTSR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprTLBSYNC , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, - InstrItinData<SprMFCR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [10, 7], - [GPR_Bypass, CR_Bypass]>, - InstrItinData<SprMFMSR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [GPR_Bypass, NoBypass]>, - InstrItinData<SprMFSPR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMFTB , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], - [29, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMTSPR , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [15, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMTSRIN , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], - [29, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprRFI , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], - [29, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprSC , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], - [29, 7], - [NoBypass, GPR_Bypass]>, - InstrItinData<FPGeneral , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [15, 7, 7], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPAddSub , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [15, 7, 7], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPCompare , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [13, 7, 7], - [CR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPDivD , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>, - InstrStage<71, [FEX1], 0>, - InstrStage<71, [FEX2], 0>, - InstrStage<71, [FEX3], 0>, - InstrStage<71, [FEX4], 0>, - InstrStage<71, [FEX5], 0>, - InstrStage<71, [FEX6]>], - [86, 7, 7], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPDivS , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>, - InstrStage<58, [FEX1], 0>, - InstrStage<58, [FEX2], 0>, - InstrStage<58, [FEX3], 0>, - InstrStage<58, [FEX4], 0>, - InstrStage<58, [FEX5], 0>, - InstrStage<58, [FEX6]>], - [73, 7, 7], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPSqrt , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>, - InstrStage<68, [FEX1], 0>, - InstrStage<68, [FEX2], 0>, - InstrStage<68, [FEX3], 0>, - InstrStage<68, [FEX4], 0>, - InstrStage<68, [FEX5], 0>, - InstrStage<68, [FEX6]>], - [86, 7], // FIXME: should be [86, 7] for double - // and [82, 7] for single. Likewise, - // the FEX? cycle count should be 68 - // for double and 64 for single. - [NoBypass, FPR_Bypass]>, - InstrItinData<FPFused , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [15, 7, 7, 7], - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPRes , [InstrStage<4, - [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, - InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, - InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, - InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], - [15, 7], - [FPR_Bypass, FPR_Bypass]> + [XU, FU], [], [ + InstrItinData<IntSimple , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<IntGeneral , [InstrStage<1, [XU]>], + [2, 1, 1]>, + InstrItinData<IntCompare , [InstrStage<1, [XU]>], + [2, 1, 1]>, + InstrItinData<IntDivW , [InstrStage<1, [XU]>], + [39, 1, 1]>, + InstrItinData<IntDivD , [InstrStage<1, [XU]>], + [71, 1, 1]>, + InstrItinData<IntMulHW , [InstrStage<1, [XU]>], + [5, 1, 1]>, + InstrItinData<IntMulHWU , [InstrStage<1, [XU]>], + [5, 1, 1]>, + InstrItinData<IntMulLI , [InstrStage<1, [XU]>], + [6, 1, 1]>, + InstrItinData<IntRotate , [InstrStage<1, [XU]>], + [2, 1, 1]>, + InstrItinData<IntRotateD , [InstrStage<1, [XU]>], + [2, 1, 1]>, + InstrItinData<IntRotateDI , [InstrStage<1, [XU]>], + [2, 1, 1]>, + InstrItinData<IntShift , [InstrStage<1, [XU]>], + [2, 1, 1]>, + InstrItinData<IntTrapW , [InstrStage<1, [XU]>], + [2, 1]>, + InstrItinData<IntTrapD , [InstrStage<1, [XU]>], + [2, 1]>, + InstrItinData<BrB , [InstrStage<1, [XU]>], + [6, 1, 1]>, + InstrItinData<BrCR , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<BrMCR , [InstrStage<1, [XU]>], + [5, 1, 1]>, + InstrItinData<BrMCRX , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<LdStDCBA , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<LdStDCBF , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<LdStDCBI , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<LdStLoad , [InstrStage<1, [XU]>], + [6, 1, 1]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [XU]>], + [6, 8, 1, 1]>, + InstrItinData<LdStLDU , [InstrStage<1, [XU]>], + [6, 1, 1]>, + InstrItinData<LdStStore , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [XU]>], + [2, 1, 1, 1]>, + InstrItinData<LdStICBI, [InstrStage<1, [XU]>], + [16, 1, 1]>, + InstrItinData<LdStSTFD , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [XU]>], + [2, 1, 1, 1]>, + InstrItinData<LdStLFD , [InstrStage<1, [XU]>], + [7, 1, 1]>, + InstrItinData<LdStLFDU , [InstrStage<1, [XU]>], + [7, 9, 1, 1]>, + InstrItinData<LdStLHA , [InstrStage<1, [XU]>], + [6, 1, 1]>, + InstrItinData<LdStLHAU , [InstrStage<1, [XU]>], + [6, 8, 1, 1]>, + InstrItinData<LdStLWARX , [InstrStage<1, [XU]>], + [82, 1, 1]>, // L2 latency + InstrItinData<LdStSTD , [InstrStage<1, [XU]>], + [1, 1, 1]>, + InstrItinData<LdStSTDU , [InstrStage<1, [XU]>], + [2, 1, 1, 1]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [XU]>], + [82, 1, 1]>, // L2 latency + InstrItinData<LdStSTWCX , [InstrStage<1, [XU]>], + [82, 1, 1]>, // L2 latency + InstrItinData<LdStSync , [InstrStage<1, [XU]>], + [6]>, + InstrItinData<SprISYNC , [InstrStage<1, [XU]>], + [16]>, + InstrItinData<SprMTMSR , [InstrStage<1, [XU]>], + [16, 1]>, + InstrItinData<SprMFCR , [InstrStage<1, [XU]>], + [6, 1]>, + InstrItinData<SprMFMSR , [InstrStage<1, [XU]>], + [4, 1]>, + InstrItinData<SprMFSPR , [InstrStage<1, [XU]>], + [6, 1]>, + InstrItinData<SprMFTB , [InstrStage<1, [XU]>], + [4, 1]>, + InstrItinData<SprMTSPR , [InstrStage<1, [XU]>], + [6, 1]>, + InstrItinData<SprRFI , [InstrStage<1, [XU]>], + [16]>, + InstrItinData<SprSC , [InstrStage<1, [XU]>], + [16]>, + InstrItinData<FPGeneral , [InstrStage<1, [FU]>], + [6, 1, 1]>, + InstrItinData<FPAddSub , [InstrStage<1, [FU]>], + [6, 1, 1]>, + InstrItinData<FPCompare , [InstrStage<1, [FU]>], + [5, 1, 1]>, + InstrItinData<FPDivD , [InstrStage<1, [FU]>], + [72, 1, 1]>, + InstrItinData<FPDivS , [InstrStage<1, [FU]>], + [59, 1, 1]>, + InstrItinData<FPSqrt , [InstrStage<1, [FU]>], + [69, 1, 1]>, + InstrItinData<FPFused , [InstrStage<1, [FU]>], + [6, 1, 1, 1]>, + InstrItinData<FPRes , [InstrStage<1, [FU]>], + [6, 1]> ]>; // ===---------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td index 9bb779a..c189b9e 100644 --- a/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -36,6 +36,8 @@ def CFX_0 : FuncUnit; // CFX pipeline def LSU_0 : FuncUnit; // LSU pipeline def FPU_0 : FuncUnit; // FPU pipeline +def CR_Bypass : Bypass; + def PPCE500mcItineraries : ProcessorItineraries< [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], [CR_Bypass, GPR_Bypass, FPR_Bypass], [ diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td index d7e11ac..7a24d20 100644 --- a/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/lib/Target/PowerPC/PPCScheduleE5500.td @@ -39,6 +39,7 @@ def CFX_1 : FuncUnit; // CFX pipeline stage 1 // def LSU_0 : FuncUnit; // LSU pipeline // def FPU_0 : FuncUnit; // FPU pipeline +// def CR_Bypass : Bypass; def PPCE5500Itineraries : ProcessorItineraries< [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 12d0326..7231ab1 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -15,6 +15,7 @@ #include "PPC.h" #include "PPCRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Function.h" @@ -74,6 +75,7 @@ void PPCSubtarget::initializeEnvironment() { Use64BitRegs = false; HasAltivec = false; HasQPX = false; + HasFCPSGN = false; HasFSQRT = false; HasFRE = false; HasFRES = false; @@ -88,6 +90,8 @@ void PPCSubtarget::initializeEnvironment() { HasPOPCNTD = false; HasLDBRX = false; IsBookE = false; + DeprecatedMFTB = false; + DeprecatedDST = false; HasLazyResolverStubs = false; IsJITCodeModel = false; } @@ -163,14 +167,7 @@ bool PPCSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here, - // but we can't because we can't reassign the cr registers. There is a - // dependence between the cr register and the RLWINM instruction used - // to extract its value which the anti-dependency breaker can't currently - // see. Maybe we should make a late-expanded pseudo to encode this dependency. - // (the relevant code is in PPCDAGToDAGISel::SelectSETCC) - - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + Mode = TargetSubtargetInfo::ANTIDEP_ALL; CriticalPathRCs.clear(); @@ -179,9 +176,44 @@ bool PPCSubtarget::enablePostRAScheduler( else CriticalPathRCs.push_back(&PPC::GPRCRegClass); - CriticalPathRCs.push_back(&PPC::F8RCRegClass); - CriticalPathRCs.push_back(&PPC::VRRCRegClass); - return OptLevel >= CodeGenOpt::Default; } +// Embedded cores need aggressive scheduling. +static bool needsAggressiveScheduling(unsigned Directive) { + switch (Directive) { + default: return false; + case PPC::DIR_440: + case PPC::DIR_A2: + case PPC::DIR_E500mc: + case PPC::DIR_E5500: + return true; + } +} + +bool PPCSubtarget::enableMachineScheduler() const { + // Enable MI scheduling for the embedded cores. + // FIXME: Enable this for all cores (some additional modeling + // may be necessary). + return needsAggressiveScheduling(DarwinDirective); +} + +void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const { + if (needsAggressiveScheduling(DarwinDirective)) { + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + } + + // Spilling is generally expensive on all PPC cores, so always enable + // register-pressure tracking. + Policy.ShouldTrackPressure = true; +} + +bool PPCSubtarget::useAA() const { + // Use AA during code generation for the embedded cores. + return needsAggressiveScheduling(DarwinDirective); +} + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 3f3fc0e..c863a6e 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -76,6 +76,8 @@ protected: bool IsPPC64; bool HasAltivec; bool HasQPX; + bool HasVSX; + bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; bool HasRecipPrec; @@ -87,6 +89,8 @@ protected: bool HasPOPCNTD; bool HasLDBRX; bool IsBookE; + bool DeprecatedMFTB; + bool DeprecatedDST; bool HasLazyResolverStubs; bool IsJITCodeModel; bool IsLittleEndian; @@ -171,6 +175,7 @@ public: bool isLittleEndian() const { return IsLittleEndian; } // Specific obvious features. + bool hasFCPSGN() const { return HasFCPSGN; } bool hasFSQRT() const { return HasFSQRT; } bool hasFRE() const { return HasFRE; } bool hasFRES() const { return HasFRES; } @@ -188,6 +193,8 @@ public: bool hasPOPCNTD() const { return HasPOPCNTD; } bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } + bool isDeprecatedMFTB() const { return DeprecatedMFTB; } + bool isDeprecatedDST() const { return DeprecatedDST; } const Triple &getTargetTriple() const { return TargetTriple; } @@ -205,6 +212,14 @@ public: bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const; + + // Scheduling customization. + bool enableMachineScheduler() const; + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const; + bool useAA() const; }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h new file mode 100644 index 0000000..e876be1 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -0,0 +1,23 @@ +//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCTARGETSTREAMER_H +#define PPCTARGETSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { +class PPCTargetStreamer : public MCTargetStreamer { +public: + virtual ~PPCTargetStreamer(); + virtual void emitTCEntry(const MCSymbol &S) = 0; +}; +} + +#endif diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2504ba7..8879630 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -77,6 +77,7 @@ public: /// \name Scalar TTI Implementations /// @{ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; /// @} @@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { return PSK_Software; } +void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { + if (ST->getDarwinDirective() == PPC::DIR_A2) { + // The A2 is in-order with a deep pipeline, and concatenation unrolling + // helps expose latency-hiding opportunities to the instruction scheduler. + UP.Partial = UP.Runtime = true; + } +} + unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasAltivec()) return 0; |