diff options
Diffstat (limited to 'lib/Target/PowerPC')
35 files changed, 2848 insertions, 579 deletions
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 413142e..3d58306 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -136,21 +136,21 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - char Value = MI->getOperand(OpNo).getImm(); + int Value = MI->getOperand(OpNo).getImm(); Value = SignExtend32<5>(Value); O << (int)Value; } void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - unsigned char Value = MI->getOperand(OpNo).getImm(); + unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 31 && "Invalid u5imm argument!"); O << (unsigned int)Value; } void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - unsigned char Value = MI->getOperand(OpNo).getImm(); + unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 63 && "Invalid u6imm argument!"); O << (unsigned int)Value; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 48de583..f24edf6 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -13,8 +13,8 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" @@ -29,9 +29,16 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_1: case FK_Data_2: case FK_Data_4: + case FK_Data_8: + case PPC::fixup_ppc_toc: + case PPC::fixup_ppc_tlsreg: + case PPC::fixup_ppc_nofixup: return Value; + case PPC::fixup_ppc_lo14: + case PPC::fixup_ppc_toc16_ds: + return (Value & 0xffff) << 2; case PPC::fixup_ppc_brcond14: - return Value & 0x3ffc; + return Value & 0xfffc; case PPC::fixup_ppc_br24: return Value & 0x3fffffc; #if 0 @@ -41,6 +48,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_ha16: return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_toc16: return Value & 0xffff; } } @@ -55,7 +63,9 @@ public: void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) {} + MCValue Target, uint64_t &FixedValue) { + llvm_unreachable("Relocation emission for MachO/PPC unimplemented!"); + } }; class PPCAsmBackend : public MCAsmBackend { @@ -72,7 +82,12 @@ public: { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_lo16", 16, 16, 0 }, { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo14", 16, 14, 0 } + { "fixup_ppc_lo14", 16, 14, 0 }, + { "fixup_ppc_toc", 0, 64, 0 }, + { "fixup_ppc_toc16", 16, 16, 0 }, + { "fixup_ppc_toc16_ds", 16, 14, 0 }, + { "fixup_ppc_tlsreg", 0, 0, 0 }, + { "fixup_ppc_nofixup", 0, 0, 0 } }; if (Kind < FirstTargetFixupKind) @@ -83,6 +98,20 @@ public: return Infos[Kind - FirstTargetFixupKind]; } + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + + // For each byte of the fragment that the fixup touches, mask in the bits + // from the fixup value. The Value has been "split up" into the appropriate + // bitfields above. + for (unsigned i = 0; i != 4; ++i) + Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); + } + bool mayNeedRelaxation(const MCInst &Inst) const { // FIXME. return false; @@ -90,7 +119,7 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); @@ -126,11 +155,6 @@ namespace { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const { - llvm_unreachable("UNIMP"); - } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; return createMachObjectWriter(new PPCMachObjectWriter( @@ -152,19 +176,6 @@ namespace { ELFPPCAsmBackend(const Target &T, uint8_t OSABI) : PPCAsmBackend(T), OSABI(OSABI) { } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const { - Value = adjustFixupValue(Fixup.getKind(), Value); - if (!Value) return; // Doesn't change encoding. - - unsigned Offset = Fixup.getOffset(); - - // For each byte of the fragment that the fixup touches, mask in the bits from - // the fixup value. The Value has been "split up" into the appropriate - // bitfields above. - for (unsigned i = 0; i != 4; ++i) - Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); - } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; @@ -181,7 +192,7 @@ namespace { -MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) { if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index a197981..d61e741 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -21,13 +24,35 @@ namespace { virtual ~PPCELFObjectWriter(); protected: + virtual unsigned getRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; + virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); + + virtual void sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs); + }; + + class PPCELFRelocationEntry : public ELFRelocationEntry { + public: + PPCELFRelocationEntry(const ELFRelocationEntry &RE); + bool operator<(const PPCELFRelocationEntry &RE) const { + return (RE.r_offset < r_offset || + (RE.r_offset == r_offset && RE.Type > Type)); + } }; } +PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE) + : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol, + RE.r_addend, *RE.Fixup) {} + PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) : MCELFObjectTargetWriter(Is64Bit, OSABI, Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, @@ -36,11 +61,13 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) PPCELFObjectWriter::~PPCELFObjectWriter() { } -unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel, - bool IsRelocWithSymbol, - int64_t Addend) const { +unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const +{ + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + // determine the type of the relocation unsigned Type; if (IsPCRel) { @@ -50,9 +77,14 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, case PPC::fixup_ppc_br24: Type = ELF::R_PPC_REL24; break; + case FK_Data_4: case FK_PCRel_4: Type = ELF::R_PPC_REL32; break; + case FK_Data_8: + case FK_PCRel_8: + Type = ELF::R_PPC64_REL64; + break; } } else { switch ((unsigned)Fixup.getKind()) { @@ -61,17 +93,105 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, Type = ELF::R_PPC_ADDR24; break; case PPC::fixup_ppc_brcond14: - Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_ + Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_ break; case PPC::fixup_ppc_ha16: - Type = ELF::R_PPC_ADDR16_HA; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_HA: + Type = ELF::R_PPC_TPREL16_HA; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: + Type = ELF::R_PPC64_DTPREL16_HA; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16_HA; + break; + case MCSymbolRefExpr::VK_PPC_TOC16_HA: + Type = ELF::R_PPC64_TOC16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: + Type = ELF::R_PPC64_GOT_TPREL16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: + Type = ELF::R_PPC64_GOT_TLSGD16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: + Type = ELF::R_PPC64_GOT_TLSLD16_HA; + break; + } break; case PPC::fixup_ppc_lo16: - Type = ELF::R_PPC_ADDR16_LO; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_LO: + Type = ELF::R_PPC_TPREL16_LO; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + Type = ELF::R_PPC64_DTPREL16_LO; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16_LO; + break; + case MCSymbolRefExpr::VK_PPC_TOC16_LO: + Type = ELF::R_PPC64_TOC16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO: + Type = ELF::R_PPC64_GOT_TLSGD16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + Type = ELF::R_PPC64_GOT_TLSLD16_LO; + break; + } break; case PPC::fixup_ppc_lo14: Type = ELF::R_PPC_ADDR14; break; + case PPC::fixup_ppc_toc: + Type = ELF::R_PPC64_TOC; + break; + case PPC::fixup_ppc_toc16: + Type = ELF::R_PPC64_TOC16; + break; + case PPC::fixup_ppc_toc16_ds: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: + Type = ELF::R_PPC64_TOC16_DS; + break; + case MCSymbolRefExpr::VK_PPC_TOC16_LO: + Type = ELF::R_PPC64_TOC16_LO_DS; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO: + Type = ELF::R_PPC64_GOT_TPREL16_LO_DS; + break; + } + break; + case PPC::fixup_ppc_tlsreg: + Type = ELF::R_PPC64_TLS; + break; + case PPC::fixup_ppc_nofixup: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TLSGD: + Type = ELF::R_PPC64_TLSGD; + break; + case MCSymbolRefExpr::VK_PPC_TLSLD: + Type = ELF::R_PPC64_TLSLD; + break; + } + break; + case FK_Data_8: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TOC: + Type = ELF::R_PPC64_TOC; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC64_ADDR64; + break; + } + break; case FK_Data_4: Type = ELF::R_PPC_ADDR32; break; @@ -83,11 +203,41 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } +unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + return getRelocTypeInner(Target, Fixup, IsPCRel); +} + +const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + assert(Target.getSymA() && "SymA cannot be 0"); + const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); + + unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel); + + // The .odp creation emits a relocation against the symbol ".TOC." which + // create a R_PPC64_TOC relocation. However the relocation symbol name + // in final object creation should be NULL, since the symbol does not + // really exist, it is just the reference to TOC base for the current + // object file. + bool EmitThisSym = RelocType != ELF::R_PPC64_TOC; + + if (EmitThisSym && !Symbol.isTemporary()) + return &Symbol; + return NULL; +} + void PPCELFObjectWriter:: adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { switch ((unsigned)Fixup.getKind()) { case PPC::fixup_ppc_ha16: case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_toc16: + case PPC::fixup_ppc_toc16_ds: RelocOffset += 2; break; default: @@ -95,6 +245,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { } } +// The standard sorter only sorts on the r_offset field, but PowerPC can +// have multiple relocations at the same offset. Sort secondarily on the +// relocation type to avoid nondeterminism. +void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + + // Copy to a temporary vector of relocation entries having a different + // sort function. + std::vector<PPCELFRelocationEntry> TmpRelocs; + + for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin(); + R != Relocs.end(); ++R) { + TmpRelocs.push_back(PPCELFRelocationEntry(*R)); + } + + // Sort in place by ascending r_offset and descending r_type. + array_pod_sort(TmpRelocs.begin(), TmpRelocs.end()); + + // Copy back to the original vector. + unsigned I = 0; + for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin(); + R != TmpRelocs.end(); ++R, ++I) { + Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type, + R->Symbol, R->r_addend, *R->Fixup); + } +} + + MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index b3c889e..7917f77 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -34,6 +34,23 @@ enum Fixups { /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs /// like 'std'. fixup_ppc_lo14, + + /// fixup_ppc_toc - Insert value of TOC base (.TOC.). + fixup_ppc_toc, + + /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base. + fixup_ppc_toc16, + + /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with + /// implied 2 zero bits + fixup_ppc_toc16_ds, + + /// fixup_ppc_tlsreg - Insert thread-pointer register number. + fixup_ppc_tlsreg, + + /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call + /// to __tls_get_addr for the TLS general and local dynamic models. + fixup_ppc_nofixup, // Marker LastTargetFixupKind, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index b9ea8b5..215aa40 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -59,8 +59,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { HasLEB128 = true; // Target asm supports leb128 directives (little-endian) // Exceptions handling - if (!is64Bit) - ExceptionsType = ExceptionHandling::DwarfCFI; + ExceptionsType = ExceptionHandling::DwarfCFI; ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index f652422..d048426 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -12,29 +12,45 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { class PPCMCCodeEmitter : public MCCodeEmitter { - PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT - + PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; + + const MCSubtargetInfo &STI; + Triple TT; + public: PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) { + MCContext &ctx) + : STI(sti), TT(STI.getTargetTriple()) { } ~PPCMCCodeEmitter() {} + bool is64BitMode() const { + return (STI.getFeatureBits() & PPC::Feature64Bit) != 0; + } + + bool isSVR4ABI() const { + return TT.isMacOSX() == 0; + } + unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, @@ -47,6 +63,8 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; @@ -61,11 +79,20 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { - unsigned Bits = getBinaryCodeForInstr(MI, Fixups); + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); + + // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the + // following 'nop'. + unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF || + Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) + Size = 8; // Output the constant in big endian byte order. - for (unsigned i = 0; i != 4; ++i) { - OS << (char)(Bits >> 24); + int ShiftValue = (Size * 8) - 8; + for (unsigned i = 0; i != Size; ++i) { + OS << (char)(Bits >> ShiftValue); Bits <<= 8; } @@ -92,6 +119,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_br24)); + + // For special TLS calls, add another fixup for the symbol. Apparently + // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently + // similar that TblGen will not generate a separate case for the latter + // two, so this is the only way to get the extra fixup generated. + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) { + const MCOperand &MO2 = MI.getOperand(OpNo+1); + Fixups.push_back(MCFixup::Create(0, MO2.getExpr(), + (MCFixupKind)PPC::fixup_ppc_nofixup)); + } return 0; } @@ -140,8 +178,12 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16)); + if (isSVR4ABI() && is64BitMode()) + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_toc16)); + else + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); return RegBits; } @@ -158,17 +200,36 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo14)); + if (isSVR4ABI() && is64BitMode()) + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_toc16_ds)); + else + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo14)); return RegBits; } +unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the TLS register, which simply provides a relocation + // hint to the linker that this statement is part of a relocation sequence. + // Return the thread-pointer register's encoding. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_tlsreg)); + return getPPCRegisterNumbering(PPC::X13); +} + unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + assert((MI.getOpcode() == PPC::MTCRF || + MI.getOpcode() == PPC::MFOCRF || + MI.getOpcode() == PPC::MTCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 6568e82..2209f93 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #include "PPCMCTargetDesc.h" -#include "PPCMCAsmInfo.h" #include "InstPrinter/PPCInstPrinter.h" -#include "llvm/MC/MachineLocation.h" +#include "PPCMCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -70,7 +70,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { // Initial state of the frame pointer is R1. MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(PPC::R1, 0); + MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0); MAI->addInitialFrameState(0, Dst, Src); return MAI; @@ -88,6 +88,11 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, else RM = Reloc::Static; } + if (CM == CodeModel::Default) { + Triple T(TT); + if (!T.isOSDarwin() && T.getArch() == Triple::ppc64) + CM = CodeModel::Medium; + } X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 7162e15..4a42092 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -14,6 +14,9 @@ #ifndef PPCMCTARGETDESC_H #define PPCMCTARGETDESC_H +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + #include "llvm/Support/DataTypes.h" namespace llvm { @@ -36,7 +39,7 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT); +MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU); /// createPPCELFObjectWriter - Construct an PPC ELF object writer. MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index f872e86..972e138 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -14,6 +14,9 @@ #ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H #define LLVM_TARGET_POWERPC_PPCPREDICATES_H +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 9103e12..e6d38eb 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -65,13 +65,14 @@ namespace llvm { MO_NLP_HIDDEN_FLAG = 16, /// The next are not flags but distinct values. - MO_ACCESS_MASK = 224, + MO_ACCESS_MASK = 0xe0, /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) - MO_LO16 = 32, MO_HA16 = 64, + MO_LO16 = 1 << 5, + MO_HA16 = 2 << 5, - MO_TPREL16_HA = 96, - MO_TPREL16_LO = 128 + MO_TPREL16_HA = 3 << 5, + MO_TPREL16_LO = 4 << 5 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 6e0e8bb..adb673b 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -18,14 +18,13 @@ #define DEBUG_TYPE "asmprinter" #include "PPC.h" -#include "PPCTargetMachine.h" -#include "PPCSubtarget.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" +#include "PPCSubtarget.h" +#include "PPCTargetMachine.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -33,33 +32,36 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/ELF.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace { class PPCAsmPrinter : public AsmPrinter { protected: - DenseMap<MCSymbol*, MCSymbol*> TOC; + MapVector<MCSymbol*, MCSymbol*> TOC; const PPCSubtarget &Subtarget; uint64_t TOCLabelID; public: @@ -71,6 +73,7 @@ namespace { return "PowerPC Assembly Printer"; } + MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); virtual void EmitInstruction(const MachineInstr *MI); @@ -284,8 +287,22 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'y': // A memory reference for an X-form instruction + { + const char *RegName = "r0"; + if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName); + O << RegName << ", "; + printOperand(MI, OpNo, O); + return false; + } + } + } + assert(MI->getOperand(OpNo).isReg()); O << "0("; printOperand(MI, OpNo, O); @@ -294,6 +311,25 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, } +/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry +/// exists for it. If not, create one. Then return a symbol that references +/// the TOC entry. +MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { + + MCSymbol *&TOCEntry = TOC[Sym]; + + // To avoid name clash check if the name already exists. + while (TOCEntry == 0) { + if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + "C" + Twine(TOCLabelID++)) == 0) { + TOCEntry = GetTempSymbol("C", TOCLabelID); + } + } + + return TOCEntry; +} + + /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// @@ -334,14 +370,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *PICBase = MF->getPICBaseSymbol(); // Emit the 'bl'. - TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here. - - - // FIXME: We would like an efficient form for this, so we don't have to do - // a lot of extra uniquing. - TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr:: - Create(PICBase, OutContext))); - OutStreamer.EmitInstruction(TmpInst); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL_Darwin) // Darwin vs SVR4 doesn't matter here. + // FIXME: We would like an efficient form for this, so we don't have to do + // a lot of extra uniquing. + .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); // Emit the label. OutStreamer.EmitLabel(PICBase); @@ -367,10 +399,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); - MCSymbol *&TOCEntry = TOC[MOSymbol]; - if (TOCEntry == 0) - TOCEntry = GetTempSymbol("C", TOCLabelID++); - + + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, OutContext); @@ -379,15 +410,298 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + case PPC::ADDIStocHA: { + // Transform %Xd = ADDIStocHA %X2, <ga:@sym> + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to ADDIS8. If the global address is external, + // has common linkage, is a function address, or is a jump table + // address, then generate a TOC entry and reference that. Otherwise + // reference the symbol directly. + TmpInst.setOpcode(PPC::ADDIS8); + const MachineOperand &MO = MI->getOperand(2); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) && + "Invalid operand for ADDIStocHA!"); + MCSymbol *MOSymbol = 0; + bool IsExternal = false; + bool IsFunction = false; + bool IsCommon = false; + bool IsAvailExt = false; + + if (MO.isGlobal()) { + const GlobalValue *GValue = MO.getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + IsExternal = GVar && !GVar->hasInitializer(); + IsCommon = GVar && RealGValue->hasCommonLinkage(); + IsFunction = !GVar; + IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); + } else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + + if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI()) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA, + OutContext); + TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case PPC::LDtocL: { + // Transform %Xd = LDtocL <ga:@sym>, %Xs + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to LDrs, which is a form of LD with the offset + // specified by a SymbolLo. If the global address is external, has + // common linkage, or is a jump table address, then reference the + // associated TOC entry. Otherwise reference the symbol directly. + TmpInst.setOpcode(PPC::LDrs); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isJTI()) && "Invalid operand for LDtocL!"); + MCSymbol *MOSymbol = 0; + + if (MO.isJTI()) + MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); + else { + const GlobalValue *GValue = MO.getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + } + + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO, + OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case PPC::ADDItocL: { + // Transform %Xd = ADDItocL %Xs, <ga:@sym> + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to ADDI8L. If the global address is external, then + // generate a TOC entry and reference that. Otherwise reference the + // symbol directly. + TmpInst.setOpcode(PPC::ADDI8L); + const MachineOperand &MO = MI->getOperand(2); + assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); + MCSymbol *MOSymbol = 0; + bool IsExternal = false; + bool IsFunction = false; + + if (MO.isGlobal()) { + const GlobalValue *GValue = MO.getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + IsExternal = GVar && !GVar->hasInitializer(); + IsFunction = !GVar; + } else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + + if (IsFunction || IsExternal) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO, + OutContext); + TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case PPC::ADDISgotTprelHA: { + // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTprel)); + return; + } + case PPC::LDgotTprelL: { + // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to LDrs, which is a form of LD with the offset + // specified by a SymbolLo. + TmpInst.setOpcode(PPC::LDrs); + const MachineOperand &MO = MI->getOperand(1); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO, + OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case PPC::ADDIStlsgdHA: { + // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsGD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTlsGD)); + return; + } + case PPC::ADDItlsgdL: { + // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@got@tlsgd@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsGD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsGD)); + return; + } + case PPC::GETtlsADDR: { + // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> + // Into: BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd) + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } + case PPC::ADDIStlsldHA: { + // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsLD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTlsLD)); + return; + } + case PPC::ADDItlsldL: { + // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@got@tlsld@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsLD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsLD)); + return; + } + case PPC::GETtlsldADDR: { + // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> + // Into: BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld) + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } + case PPC::ADDISdtprelHA: { + // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> + // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymDtprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X3) + .addExpr(SymDtprel)); + return; + } + case PPC::ADDIdtprelL: { + // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@dtprel@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymDtprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymDtprel)); + return; + } case PPC::MFCRpseud: case PPC::MFCR8pseud: // Transform: %R3 = MFCRpseud %CR7 // Into: %R3 = MFCR ;; cr7 OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(1).getReg())); - TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - OutStreamer.EmitInstruction(TmpInst); + OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR) + .addReg(MI->getOperand(0).getReg())); return; case PPC::SYNC: // In Book E sync is called msync, handle this special case here... @@ -415,11 +729,17 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitValueToAlignment(8); MCSymbol *Symbol1 = OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); - MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); + // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function + // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), 8/*size*/, 0/*addrspace*/); - OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), + MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); + // Generates a R_PPC64_TOC relocation for TOC base insertion. + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, + MCSymbolRefExpr::VK_PPC_TOC, OutContext), 8/*size*/, 0/*addrspace*/); + // Emit a null environment pointer. + OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -430,7 +750,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { bool PPCLinuxAsmPrinter::doFinalization(Module &M) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); bool isPPC64 = TD->getPointerSizeInBits() == 64; @@ -440,12 +760,11 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { SectionKind::getReadOnly()); OutStreamer.SwitchSection(Section); - // FIXME: This is nondeterminstic! - for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), + for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), E = TOC.end(); I != E; ++I) { OutStreamer.EmitLabel(I->second); - OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) + - "[TC]," + I->first->getName()); + MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName()); + OutStreamer.EmitTCEntry(*S); } } @@ -524,21 +843,18 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) { // Remove $stub suffix, add $lazy_ptr. - SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5); - TmpStr += "$lazy_ptr"; - return Ctx.GetOrCreateSymbol(TmpStr.str()); + StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5); + return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr"); } static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { // Add $tmp suffix to $stub, yielding $stub$tmp. - SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()); - TmpStr += "$tmp"; - return Ctx.GetOrCreateSymbol(TmpStr.str()); + return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp"); } void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { - bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; + bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); @@ -564,32 +880,50 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer.EmitLabel(Stub); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + // mflr r0 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); // FIXME: MCize this. - OutStreamer.EmitRawText(StringRef("\tmflr r0")); - OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName())); + OutStreamer.EmitRawText("\tbcl 20, 31, " + Twine(AnonSymbol->getName())); OutStreamer.EmitLabel(AnonSymbol); - OutStreamer.EmitRawText(StringRef("\tmflr r11")); - OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+ - "-" + AnonSymbol->getName() + ")"); - OutStreamer.EmitRawText(StringRef("\tmtlr r0")); - - if (isPPC64) - OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + - "-" + AnonSymbol->getName() + ")(r11)"); - else - OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + - "-" + AnonSymbol->getName() + ")(r11)"); - OutStreamer.EmitRawText(StringRef("\tmtctr r12")); - OutStreamer.EmitRawText(StringRef("\tbctr")); - + // mflr r11 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); + // addis r11, r11, ha16(LazyPtr - AnonSymbol) + const MCExpr *Sub = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext), + MCSymbolRefExpr::Create(AnonSymbol, OutContext), + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) + .addReg(PPC::R11) + .addReg(PPC::R11) + .addExpr(Sub)); + // mtlr r0 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); + + // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) + // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) + OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + .addReg(PPC::R12) + .addExpr(Sub).addExpr(Sub) + .addReg(PPC::R11)); + // mtctr r12 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + // bctr + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR)); + OutStreamer.SwitchSection(LSPSection); OutStreamer.EmitLabel(LazyPtr); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - if (isPPC64) - OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); - else - OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); + + MCSymbol *DyldStubBindingHelper = + OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper")); + if (isPPC64) { + // .quad dyld_stub_binding_helper + OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8); + } else { + // .long dyld_stub_binding_helper + OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4); + } } OutStreamer.AddBlankLine(); return; @@ -609,23 +943,42 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { EmitAlignment(4); OutStreamer.EmitLabel(Stub); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")"); - if (isPPC64) - OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + - ")(r11)"); - else - OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + - ")(r11)"); - OutStreamer.EmitRawText(StringRef("\tmtctr r12")); - OutStreamer.EmitRawText(StringRef("\tbctr")); + // lis r11, ha16(LazyPtr) + const MCExpr *LazyPtrHa16 = + MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS) + .addReg(PPC::R11) + .addExpr(LazyPtrHa16)); + + const MCExpr *LazyPtrLo16 = + MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16, + OutContext); + // ldu r12, lo16(LazyPtr)(r11) + // lwzu r12, lo16(LazyPtr)(r11) + OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + .addReg(PPC::R12) + .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) + .addReg(PPC::R11)); + + // mtctr r12 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + // bctr + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR)); + OutStreamer.SwitchSection(LSPSection); OutStreamer.EmitLabel(LazyPtr); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - if (isPPC64) - OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); - else - OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); + + MCSymbol *DyldStubBindingHelper = + OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper")); + if (isPPC64) { + // .quad dyld_stub_binding_helper + OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8); + } else { + // .long dyld_stub_binding_helper + OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4); + } } OutStreamer.AddBlankLine(); @@ -633,7 +986,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { bool PPCDarwinAsmPrinter::doFinalization(Module &M) { - bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; + bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; // Darwin/PPC always uses mach-o. const TargetLoweringObjectFileMachO &TLOFMacho = diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 21a0fb2..9911575 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -17,13 +17,13 @@ #define DEBUG_TYPE "ppc-branch-select" #include "PPC.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" -#include "MCTargetDesc/PPCPredicates.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; STATISTIC(NumExpanded, "Number of branches expanded to long format"); diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 2a2abb1..a74932c 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -31,20 +31,20 @@ #define DEBUG_TYPE "ctrloops" #include "PPC.h" -#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/Constants.h" -#include "llvm/PassSupport.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Constants.h" +#include "llvm/PassSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index b2b5364..3f87e88 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -12,12 +12,19 @@ // //===----------------------------------------------------------------------===// +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; + //===----------------------------------------------------------------------===// // Return Value Calling Convention //===----------------------------------------------------------------------===// // Return-value convention for PowerPC def RetCC_PPC : CallingConv<[ + // On PPC64, integer return values are always promoted to i64 + CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 252a2d1..d68bfd1 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -12,15 +12,15 @@ // //===----------------------------------------------------------------------===// -#include "PPCTargetMachine.h" -#include "PPCRelocations.h" #include "PPC.h" -#include "llvm/Module.h" -#include "llvm/PassManager.h" +#include "PPCRelocations.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/PassManager.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -68,6 +68,7 @@ namespace { unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; const char *getPassName() const { return "PowerPC Machine Code Emitter"; } @@ -243,6 +244,13 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, } +unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("TLS not supported on the old JIT."); + return 0; +} + + unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index c24afa9..5901f36 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -12,15 +12,16 @@ //===----------------------------------------------------------------------===// #include "PPCFrameLowering.h" +#include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -49,6 +50,11 @@ static const uint16_t VRRegNo[] = { /// to manipulate the VRSAVE register, even though it uses vector registers. /// This can happen when the only registers used are known to be live in or out /// of the function. Remove all of the VRSAVE related code from the function. +/// FIXME: The removal of the code results in a compile failure at -O0 when the +/// function contains a function call, as the GPR containing original VRSAVE +/// contents is spilled and reloaded around the call. Without the prolog code, +/// the spill instruction refers to an undefined register. This code needs +/// to account for all uses of that GPR. static void RemoveVRSaveCode(MachineInstr *MI) { MachineBasicBlock *Entry = MI->getParent(); MachineFunction *MF = Entry->getParent(); @@ -168,6 +174,11 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MI->eraseFromParent(); } +static bool spillsCR(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->isCRSpilled(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { @@ -184,13 +195,22 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). - bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. + // to adjust the stack pointer (we fit in the Red Zone). For 64-bit + // SVR4, we also require a stack frame if we need to spill the CR, + // since this spill area is addressed relative to the stack pointer. + bool DisableRedZone = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); + // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can + // still generate stackless code if all local vars are reg-allocated. + // Try: (FrameSize <= 224 + // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI())) if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. + !(Subtarget.isPPC64() && // No 64-bit SVR4 CRsave. + Subtarget.isSVR4ABI() + && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame MFI->setStackSize(0); @@ -241,7 +261,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || @@ -268,12 +289,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. - for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { - if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { - HandleVRSaveUpdate(MBBI, TII); - break; + if (!Subtarget.isSVR4ABI()) + for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { + if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + HandleVRSaveUpdate(MBBI, TII); + break; + } } - } // Move MBBI back to the beginning of the function. MBBI = MBB.begin(); @@ -488,7 +510,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Add callee saved registers to move list. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); unsigned Reg = CSI[I].getReg(); if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; @@ -497,6 +518,25 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (PPC::CRBITRCRegClass.contains(Reg)) continue; + // For SVR4, don't emit a move for the CR spill slot if we haven't + // spilled CRs. + if (Subtarget.isSVR4ABI() + && (PPC::CR2 <= Reg && Reg <= PPC::CR4) + && !spillsCR(MF)) + continue; + + // For 64-bit SVR4 when we have spilled CRs, the spill location + // is SP+8, not a frame-relative slot. + if (Subtarget.isSVR4ABI() + && Subtarget.isPPC64() + && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + MachineLocation CSDst(PPC::X1, 8); + MachineLocation CSSrc(PPC::CR2); + Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + continue; + } + + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); Moves.push_back(MachineMove(Label, CSDst, CSSrc)); @@ -714,11 +754,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -static bool spillsCR(const MachineFunction &MF) { - const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - return FuncInfo->isCRSpilled(); -} - /// MustSaveLR - Return true if this function requires that we save the LR /// register onto the stack in the prolog and restore it in the epilog of the /// function. @@ -808,7 +843,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) bool HasGPSaveArea = false; bool HasG8SaveArea = false; bool HasFPSaveArea = false; - bool HasCRSaveArea = false; bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; @@ -843,10 +877,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinFPR) { MinFPR = Reg; } -// FIXME SVR4: Disable CR save area for now. } else if (PPC::CRBITRCRegClass.contains(Reg) || PPC::CRRCRegClass.contains(Reg)) { -// HasCRSaveArea = true; + ; // do nothing, as we already know whether CRs are spilled } else if (PPC::VRSAVERCRegClass.contains(Reg)) { HasVRSAVESaveArea = true; } else if (PPC::VRRCRegClass.contains(Reg)) { @@ -926,16 +959,21 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } - // The CR save area is below the general register save area. - if (HasCRSaveArea) { - // FIXME SVR4: Is it actually possible to have multiple elements in CSI - // which have the CR/CRBIT register class? + // For 32-bit only, the CR save area is below the general register + // save area. For 64-bit SVR4, the CR save area is addressed relative + // to the stack pointer and hence does not need an adjustment here. + // Only CR2 (the first nonvolatile spilled) has an associated frame + // index so that we have a single uniform save area. + if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { // Adjust the frame index of the CR spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::CRBITRCRegClass.contains(Reg) || - PPC::CRRCRegClass.contains(Reg)) { + if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) + // Leave Darwin logic as-is. + || (!Subtarget.isSVR4ABI() && + (PPC::CRBITRCRegClass.contains(Reg) || + PPC::CRRCRegClass.contains(Reg)))) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -973,3 +1011,184 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } } + +bool +PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + // Currently, this function only handles SVR4 32- and 64-bit ABIs. + // Return false otherwise to maintain pre-existing behavior. + if (!Subtarget.isSVR4ABI()) + return false; + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + DebugLoc DL; + bool CRSpilled = false; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + // CR2 through CR4 are the nonvolatile CR fields. + bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; + + if (CRSpilled && IsCRField) + continue; + + // Add the callee-saved register as live-in; it's killed at the spill. + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. + if (IsCRField) { + CRSpilled = true; + // The first time we see a CR field, store the whole CR into the + // save slot via GPR12 (available in the prolog for 32- and 64-bit). + if (Subtarget.isPPC64()) { + // 64-bit: SP+8 + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12)); + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::X12, + getKillRegState(true)) + .addImm(8) + .addReg(PPC::X1)); + } else { + // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have + // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)); + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::R12, + getKillRegState(true)), + CSI[i].getFrameIdx())); + } + + // Record that we spill the CR in this function. + PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); + FuncInfo->setSpillsCR(); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); + } + } + return true; +} + +static void +restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + DebugLoc DL; + unsigned RestoreOp, MoveReg; + + if (isPPC64) { + // 64-bit: SP+8 + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12) + .addImm(8) + .addReg(PPC::X1)); + RestoreOp = PPC::MTCRF8; + MoveReg = PPC::X12; + } else { + // 32-bit: FP-relative + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), + PPC::R12), + CSI[CSIIndex].getFrameIdx())); + RestoreOp = PPC::MTCRF; + MoveReg = PPC::R12; + } + + if (CR2Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) + .addReg(MoveReg)); + + if (CR3Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) + .addReg(MoveReg)); + + if (CR4Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) + .addReg(MoveReg)); +} + +bool +PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + // Currently, this function only handles SVR4 32- and 64-bit ABIs. + // Return false otherwise to maintain pre-existing behavior. + if (!Subtarget.isSVR4ABI()) + return false; + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + bool CR2Spilled = false; + bool CR3Spilled = false; + bool CR4Spilled = false; + unsigned CSIIndex = 0; + + // Initialize insertion-point logic; we will be restoring in reverse + // order of spill. + MachineBasicBlock::iterator I = MI, BeforeI = I; + bool AtStart = I == MBB.begin(); + + if (!AtStart) + --BeforeI; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + + if (Reg == PPC::CR2) { + CR2Spilled = true; + // The spill slot is associated only with CR2, which is the + // first nonvolatile spilled. Save it here. + CSIIndex = i; + continue; + } else if (Reg == PPC::CR3) { + CR3Spilled = true; + continue; + } else if (Reg == PPC::CR4) { + CR4Spilled = true; + continue; + } else { + // When we first encounter a non-CR register after seeing at + // least one CR register, restore all spilled CRs together. + if ((CR2Spilled || CR3Spilled || CR4Spilled) + && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + MBB, I, CSI, CSIIndex); + CR2Spilled = CR3Spilled = CR4Spilled = false; + } + + // Default behavior for non-CR saves. + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), + RC, TRI); + assert(I != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + } + + // Insert in reverse order. + if (AtStart) + I = MBB.begin(); + else { + I = BeforeI; + ++I; + } + } + + // If we haven't yet spilled the CRs, do so now. + if (CR2Spilled || CR3Spilled || CR4Spilled) + restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + MBB, I, CSI, CSIIndex); + + return true; +} + diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d708541..3517d8c 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -15,9 +15,9 @@ #include "PPC.h" #include "PPCSubtarget.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/STLExtras.h" namespace llvm { class PPCSubtarget; @@ -45,6 +45,16 @@ public: RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). @@ -170,23 +180,11 @@ public: {PPC::R15, -68}, {PPC::R14, -72}, - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, + // CR save area offset. We map each of the nonvolatile CR fields + // to the slot for CR2, which is the first of the nonvolatile CR + // fields to be assigned, so that we only allocate one save slot. + // See PPCRegisterInfo::hasReservedSpillSlot() for more information. + {PPC::CR2, -4}, // VRSAVE save area offset. {PPC::VRSAVE, -4}, @@ -228,27 +226,6 @@ public: {PPC::F14, -144}, // General register save area offsets. - // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit - // mode? - {PPC::R31, -4}, - {PPC::R30, -12}, - {PPC::R29, -20}, - {PPC::R28, -28}, - {PPC::R27, -36}, - {PPC::R26, -44}, - {PPC::R25, -52}, - {PPC::R24, -60}, - {PPC::R23, -68}, - {PPC::R22, -76}, - {PPC::R21, -84}, - {PPC::R20, -92}, - {PPC::R19, -100}, - {PPC::R18, -108}, - {PPC::R17, -116}, - {PPC::R16, -124}, - {PPC::R15, -132}, - {PPC::R14, -140}, - {PPC::X31, -8}, {PPC::X30, -16}, {PPC::X29, -24}, @@ -268,24 +245,6 @@ public: {PPC::X15, -136}, {PPC::X14, -144}, - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, - // VRSAVE save area offset. {PPC::VRSAVE, -4}, diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index e8f4d16..762b346 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -14,22 +14,24 @@ #define DEBUG_TYPE "ppc-codegen" #include "PPC.h" -#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; namespace { @@ -53,7 +55,9 @@ namespace { GlobalBaseReg = 0; SelectionDAGISel::runOnMachineFunction(MF); - InsertVRSaveCode(MF); + if (!PPCSubTarget.isSVR4ABI()) + InsertVRSaveCode(MF); + return true; } @@ -621,6 +625,88 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { } } +// getVCmpInst: return the vector compare instruction for the specified +// vector type and condition code. Since this is for altivec specific code, +// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: + case ISD::SETUEQ: + case ISD::SETNE: + case ISD::SETUNE: + if (VecVT == MVT::v16i8) + return PPC::VCMPEQUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPEQUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPEQUW; + // v4f32 != v4f32 could be translate to unordered not equal + else if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETLT: + case ISD::SETGT: + case ISD::SETLE: + case ISD::SETGE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTSB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTSH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTSW; + else if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETULT: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTUW; + break; + case ISD::SETOEQ: + if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETOLT: + case ISD::SETOGT: + case ISD::SETOLE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETOGE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGEFP; + break; + default: + break; + } + llvm_unreachable("Invalid integer vector compare condition"); +} + +// getVCmpEQInst: return the equal compare instruction for the specified vector +// type. Since this is for altivec specific code, only support the altivec +// types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { + switch (VecVT) { + case MVT::v16i8: + return PPC::VCMPEQUB; + case MVT::v8i16: + return PPC::VCMPEQUH; + case MVT::v4i32: + return PPC::VCMPEQUW; + case MVT::v4f32: + return PPC::VCMPEQFP; + default: + llvm_unreachable("Invalid integer vector compare condition"); + } +} + + SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned Imm; @@ -701,10 +787,67 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } } + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Altivec Vector compare instructions do not set any CR register by default and + // vector compare operations return the same type as the operands. + if (LHS.getValueType().isVector()) { + EVT VecVT = LHS.getValueType(); + MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; + unsigned int VCmpInst = getVCmpInst(VT, CC); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETOEQ: + case ISD::SETUEQ: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETNE: + case ISD::SETONE: + case ISD::SETUNE: { + SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp); + } + case ISD::SETLT: + case ISD::SETOLT: + case ISD::SETULT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS); + case ISD::SETGT: + case ISD::SETOGT: + case ISD::SETUGT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETGE: + case ISD::SETOGE: + case ISD::SETUGE: { + // Small optimization: Altivec provides a 'Vector Compare Greater Than + // or Equal To' instruction (vcmpgefp), so in this case there is no + // need for extra logic for the equal compare. + if (VecVT.getSimpleVT().isFloatingPoint()) { + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + } else { + SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ); + } + } + case ISD::SETLE: + case ISD::SETOLE: + case ISD::SETULE: { + SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ); + } + default: + llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); + } + } + bool Inv; int OtherCondIdx; unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx); - SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + SDValue CCReg = SelectCC(LHS, RHS, CC, dl); SDValue IntCR; // Force the ccreg into CR7. @@ -717,7 +860,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1) IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); - else + else IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, CR7Reg, CCReg), 0); @@ -1127,6 +1270,52 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { Chain), 0); return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } + case PPCISD::TOC_ENTRY: { + assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); + + // For medium code model, we generate two instructions as described + // below. Otherwise we allow SelectCodeCommon to handle this, selecting + // one of LDtoc, LDtocJTI, and LDtocCPT. + if (TM.getCodeModel() != CodeModel::Medium) + break; + + // The first source operand is a TargetGlobalAddress or a + // TargetJumpTable. If it is an externally defined symbol, a symbol + // with common linkage, a function address, or a jump table address, + // we generate: + // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) + // Otherwise we generate: + // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) + SDValue GA = N->getOperand(0); + SDValue TOCbase = N->getOperand(1); + SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, + TOCbase, GA); + + if (isa<JumpTableSDNode>(GA)) + return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, + SDValue(Tmp, 0)); + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { + const GlobalValue *GValue = G->getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + assert((GVar || isa<Function>(RealGValue)) && + "Unexpected global value subclass!"); + + // An external variable is one without an initializer. For these, + // for variables with common linkage, and for Functions, generate + // the LDtocL form. + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) + return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, + SDValue(Tmp, 0)); + } + + return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, + SDValue(Tmp, 0), GA); + } } return SelectCode(N); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index dbb3b14..9966b2c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12,15 +12,10 @@ //===----------------------------------------------------------------------===// #include "PPCISelLowering.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" -#include "MCTargetDesc/PPCPredicates.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,6 +24,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -347,6 +347,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::FSQRT, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FEXP, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FABS, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Expand); @@ -361,6 +376,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); + + for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { + MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; + setTruncStoreAction(VT, InnerVT, Expand); + } + setLoadExtAction(ISD::SEXTLOAD, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, Expand); } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle @@ -373,6 +399,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::LOAD , MVT::v4i32, Legal); setOperationAction(ISD::SELECT, MVT::v4i32, Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); @@ -392,6 +426,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + + // Altivec does not contain unordered floating-point compare instructions + setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); } if (Subtarget->has64BitSupport()) { @@ -401,6 +443,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? @@ -534,11 +578,27 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; + case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; + case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; + case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; + case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; + case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; + case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; + case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; + case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; + case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; + case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; + case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; + case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; + case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; + case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; } } EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { - return MVT::i32; + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); } //===----------------------------------------------------------------------===// @@ -1264,8 +1324,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); - SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag); - SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag); + SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); + SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } @@ -1278,19 +1338,81 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, EVT PtrVT = getPointerTy(); bool is64bit = PPCSubTarget.isPPC64(); - TLSModel::Model model = getTargetMachine().getTLSModel(GV); + TLSModel::Model Model = getTargetMachine().getTLSModel(GV); + + if (Model == TLSModel::LocalExec) { + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL16_HA); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL16_LO); + SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, + is64bit ? MVT::i64 : MVT::i32); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); + return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); + } - SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TPREL16_HA); - SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TPREL16_LO); + if (!is64bit) + llvm_unreachable("only local-exec is currently supported for ppc32"); + + if (Model == TLSModel::InitialExec) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, + PtrVT, TGA, TPOffsetHi); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); + } + + if (Model == TLSModel::GeneralDynamic) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, + GOTReg, TGA); + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, + GOTEntryHi, TGA); + + // We need a chain node, and don't have one handy. The underlying + // call has no side effects, so using the function entry node + // suffices. + SDValue Chain = DAG.getEntryNode(); + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); + SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, + PtrVT, ParmReg, TGA); + // The return value from GET_TLS_ADDR really is in X3 already, but + // some hacks are needed here to tie everything together. The extra + // copies dissolve during subsequent transforms. + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); + } - if (model != TLSModel::LocalExec) - llvm_unreachable("only local-exec TLS mode supported"); - SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); - SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); - return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); + if (Model == TLSModel::LocalDynamic) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, + GOTReg, TGA); + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, + GOTEntryHi, TGA); + + // We need a chain node, and don't have one handy. The underlying + // call has no side effects, so using the function entry node + // suffices. + SDValue Chain = DAG.getEntryNode(); + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); + SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, + PtrVT, ParmReg, TGA); + // The return value from GET_TLSLD_ADDR really is in X3 already, but + // some hacks are needed here to tie everything together. The extra + // copies dissolve during subsequent transforms. + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, + Chain, ParmReg, TGA); + return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); + } + + llvm_unreachable("Unknown TLS model!"); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, @@ -1493,7 +1615,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = - DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType( + DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( *DAG.getContext()); TargetLowering::ArgListTy Args; @@ -1716,9 +1838,13 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { - return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); + if (PPCSubTarget.isSVR4ABI()) { + if (PPCSubTarget.isPPC64()) + return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); + else + return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); } else { return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); @@ -1726,7 +1852,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, } SDValue -PPCTargetLowering::LowerFormalArguments_SVR4( +PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> @@ -1943,6 +2069,334 @@ PPCTargetLowering::LowerFormalArguments_SVR4( return Chain; } +// PPC64 passes i8, i16, and i32 values in i64 registers. Promote +// value to MVT::i64 and then truncate to the correct register size. +SDValue +PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, + SelectionDAG &DAG, SDValue ArgVal, + DebugLoc dl) const { + if (Flags.isSExt()) + ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, + DAG.getValueType(ObjectVT)); + else if (Flags.isZExt()) + ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, + DAG.getValueType(ObjectVT)); + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); +} + +// Set the size that is at least reserved in caller of this function. Tail +// call optimized functions' reserved stack space needs to be aligned so that +// taking the difference between two stack areas will result in an aligned +// stack. +void +PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, + unsigned nAltivecParamsAtEnd, + unsigned MinReservedArea, + bool isPPC64) const { + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + // Add the Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += 16*nAltivecParamsAtEnd; + } + MinReservedArea = + std::max(MinReservedArea, + PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); + unsigned TargetAlign + = DAG.getMachineFunction().getTarget().getFrameLowering()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; + FI->setMinReservedArea(MinReservedArea); +} + +SDValue +PPCTargetLowering::LowerFormalArguments_64SVR4( + SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // TODO: add description of PPC stack frame format, or at least some docs. + // + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Potential tail calls could cause overwriting of argument stack slots. + bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); + unsigned PtrByteSize = 8; + + unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); + // Area that is at least reserved in caller of this function. + unsigned MinReservedArea = ArgOffset; + + static const uint16_t GPR[] = { + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + + static const uint16_t *FPR = GetFPR(); + + static const uint16_t VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + + const unsigned Num_GPR_Regs = array_lengthof(GPR); + const unsigned Num_FPR_Regs = 13; + const unsigned Num_VR_Regs = array_lengthof(VR); + + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + // Add DAG nodes to load the arguments or copy them out of registers. On + // entry to a function on PPC, the arguments start after the linkage area, + // although the first ones are often in registers. + + SmallVector<SDValue, 8> MemOps; + unsigned nAltivecParamsAtEnd = 0; + Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { + SDValue ArgVal; + bool needsLoad = false; + EVT ObjectVT = Ins[ArgNo].VT; + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + unsigned ArgSize = ObjSize; + ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + + unsigned CurArgOffset = ArgOffset; + + // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. + if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || + ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { + if (isVarArg) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += CalculateStackSlotSize(ObjectVT, + Flags, + PtrByteSize); + } else + nAltivecParamsAtEnd++; + } else + // Calculate min reserved area. + MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, + Flags, + PtrByteSize); + + // FIXME the codegen can be much improved in some cases. + // We do not have to keep everything in memory. + if (Flags.isByVal()) { + // ObjSize is the true size, ArgSize rounded up to multiple of registers. + ObjSize = Flags.getByValSize(); + ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + // Empty aggregate parameters do not take up registers. Examples: + // struct { } a; + // union { } b; + // int c[0]; + // etc. However, we have to provide a place-holder in InVals, so + // pretend we have an 8-byte item at the current address for that + // purpose. + if (!ObjSize) { + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + InVals.push_back(FIN); + continue; + } + // All aggregates smaller than 8 bytes must be passed right-justified. + if (ObjSize < PtrByteSize) + CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); + // The value of the object is its address. + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + InVals.push_back(FIN); + + if (ObjSize < 8) { + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store; + + if (ObjSize==1 || ObjSize==2 || ObjSize==4) { + EVT ObjType = (ObjSize == 1 ? MVT::i8 : + (ObjSize == 2 ? MVT::i16 : MVT::i32)); + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(FuncArg, CurArgOffset), + ObjType, false, false, 0); + } else { + // For sizes that don't fit a truncating store (3, 5, 6, 7), + // store the whole register as-is to the parameter save area + // slot. The address of the parameter was already calculated + // above (InVals.push_back(FIN)) to be the right-justified + // offset within the slot. For this store, we need a new + // frame index that points at the beginning of the slot. + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(FuncArg, ArgOffset), + false, false, 0); + } + + MemOps.push_back(Store); + ++GPR_idx; + } + // Whether we copied from a register or not, advance the offset + // into the parameter save area by a full doubleword. + ArgOffset += PtrByteSize; + continue; + } + + for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { + // Store whatever pieces of the object are in registers + // to memory. ArgOffset will be the address of the beginning + // of the object. + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg; + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(FuncArg, ArgOffset), + false, false, 0); + MemOps.push_back(Store); + ++GPR_idx; + ArgOffset += PtrByteSize; + } else { + ArgOffset += ArgSize - j; + break; + } + } + continue; + } + + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled argument type!"); + case MVT::i32: + case MVT::i64: + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::i32) + // PPC64 passes i8, i16, and i32 values in i64 registers. Promote + // value to MVT::i64 and then truncate to the correct register size. + ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); + + ++GPR_idx; + } else { + needsLoad = true; + ArgSize = PtrByteSize; + } + ArgOffset += 8; + break; + + case MVT::f32: + case MVT::f64: + // Every 8 bytes of argument space consumes one of the GPRs available for + // argument passing. + if (GPR_idx != Num_GPR_Regs) { + ++GPR_idx; + } + if (FPR_idx != Num_FPR_Regs) { + unsigned VReg; + + if (ObjectVT == MVT::f32) + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); + else + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + ++FPR_idx; + } else { + needsLoad = true; + ArgSize = PtrByteSize; + } + + ArgOffset += 8; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + // Note that vector arguments in registers don't reserve stack space, + // except in varargs functions. + if (VR_idx != Num_VR_Regs) { + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + if (isVarArg) { + while ((ArgOffset % 16) != 0) { + ArgOffset += PtrByteSize; + if (GPR_idx != Num_GPR_Regs) + GPR_idx++; + } + ArgOffset += 16; + GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? + } + ++VR_idx; + } else { + // Vectors are aligned. + ArgOffset = ((ArgOffset+15)/16)*16; + CurArgOffset = ArgOffset; + ArgOffset += 16; + needsLoad = true; + } + break; + } + + // We need to load the argument to a virtual register if we determined + // above that we ran out of physical registers of the appropriate type. + if (needsLoad) { + int FI = MFI->CreateFixedObject(ObjSize, + CurArgOffset + (ArgSize - ObjSize), + isImmutable); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), + false, false, false, 0); + } + + InVals.push_back(ArgVal); + } + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized functions' reserved stack space needs to be aligned so that + // taking the difference between two stack areas will result in an aligned + // stack. + setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + int Depth = ArgOffset; + + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(PtrByteSize, Depth, true)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing the + // result of va_next. + for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); + + return Chain; +} + SDValue PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Chain, @@ -2019,10 +2473,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin( default: llvm_unreachable("Unhandled argument type!"); case MVT::i32: case MVT::f32: - VecArgOffset += isPPC64 ? 8 : 4; + VecArgOffset += 4; break; case MVT::i64: // PPC64 case MVT::f64: + // FIXME: We are guaranteed to be !isPPC64 at this point. + // Does MVT::i64 apply? VecArgOffset += 8; break; case MVT::v4f32: @@ -2045,7 +2501,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; @@ -2093,10 +2550,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), - ObjSize==1 ? MVT::i8 : MVT::i16, - false, false, 0); + MachinePointerInfo(FuncArg, + CurArgOffset), + ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; } @@ -2107,8 +2565,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { // Store whatever pieces of the object are in registers - // to memory. ArgVal will be address of the beginning of - // the object. + // to memory. ArgOffset will be the address of the beginning + // of the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) @@ -2119,7 +2577,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), + MachinePointerInfo(FuncArg, ArgOffset), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2154,18 +2612,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) { + if (ObjectVT == MVT::i32) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. - if (Flags.isSExt()) - ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, - DAG.getValueType(ObjectVT)); - else if (Flags.isZExt()) - ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, - DAG.getValueType(ObjectVT)); - - ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); - } + ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); ++GPR_idx; } else { @@ -2252,23 +2702,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin( } // Set the size that is at least reserved in caller of this function. Tail - // call optimized function's reserved stack space needs to be aligned so that + // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - // Add the Altivec parameters at the end, if needed. - if (nAltivecParamsAtEnd) { - MinReservedArea = ((MinReservedArea+15)/16)*16; - MinReservedArea += 16*nAltivecParamsAtEnd; - } - MinReservedArea = - std::max(MinReservedArea, - PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); - unsigned AlignMask = TargetAlign-1; - MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; - FI->setMinReservedArea(MinReservedArea); + setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. @@ -2308,8 +2745,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( return Chain; } -/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus -/// linkage area for the Darwin ABI. +/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus +/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. static unsigned CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, bool isPPC64, @@ -2718,7 +3155,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // Thus for a call through a function pointer, the following actions need // to be performed: // 1. Save the TOC of the caller in the TOC save area of its stack - // frame (this is done in LowerCall_Darwin()). + // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). // 2. Load the address of the function entry point from the function // descriptor. // 3. Load the TOC of the callee from the function descriptor into r2. @@ -2808,6 +3245,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, return CallOpc; } +static +bool isLocalCall(const SDValue &Callee) +{ + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + return !G->getGlobal()->isDeclaration() && + !G->getGlobal()->isWeakForLinker(); + return false; +} + SDValue PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -2823,12 +3269,32 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; - EVT VT = VA.getValVT(); assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyFromReg(Chain, dl, - VA.getLocReg(), VT, InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - InFlag = Chain.getValue(2); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, + VA.getLocReg(), VA.getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, + DAG.getValueType(VA.getValVT())); + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, + DAG.getValueType(VA.getValVT())); + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + } + + InVals.push_back(Val); } return Chain; @@ -2916,8 +3382,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if (CallOpc == PPCISD::CALL_SVR4) { - // Otherwise insert NOP. + } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) { + // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP_SVR4; } } @@ -2959,10 +3425,16 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); - if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) - return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, - dl, DAG, InVals); + if (PPCSubTarget.isSVR4ABI()) { + if (PPCSubTarget.isPPC64()) + return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); + else + return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); + } return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, OutVals, Ins, @@ -2970,15 +3442,15 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } SDValue -PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description +PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. assert((CallConv == CallingConv::C || @@ -3182,6 +3654,396 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Ins, InVals); } +// Copy an argument into memory, being careful to do this outside the +// call sequence for the call to which the argument belongs. +SDValue +PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, + SDValue CallSeqStart, + ISD::ArgFlagsTy Flags, + SelectionDAG &DAG, + DebugLoc dl) const { + SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, dl); + // The MEMCPY must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), + NewCallSeqStart.getNode()); + return NewCallSeqStart; +} + +SDValue +PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + unsigned NumOps = Outs.size(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned PtrByteSize = 8; + + MachineFunction &MF = DAG.getMachineFunction(); + + // Mark this function as potentially containing a function that contains a + // tail call. As a consequence the frame pointer will be used for dynamicalloc + // and restoring the callers stack pointer in this functions epilog. This is + // done because by tail calling the called function might overwrite the value + // in this function's (MF) stack pointer stack slot 0(SP). + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) + MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); + + unsigned nAltivecParamsAtEnd = 0; + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. We start with at least 48 bytes, which + // is reserved space for [SP][CR][LR][3 x unused]. + // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result + // of this call. + unsigned NumBytes = + CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, + Outs, OutVals, nAltivecParamsAtEnd); + + // Calculate by how many bytes the stack has to be adjusted in case of tail + // call optimization. + int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + + // To protect arguments on the stack from being clobbered in a tail call, + // force all the loads to happen before doing any other lowering. + if (isTailCall) + Chain = DAG.getStackArgumentTokenFactor(Chain); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + SDValue CallSeqStart = Chain; + + // Load the return address and frame pointer so it can be move somewhere else + // later. + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, + dl); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + + // Figure out which arguments are going to go in registers, and which in + // memory. Also, if this is a vararg function, floating point operations + // must be stored to our stack, and loaded into integer regs as well, if + // any integer regs are available for argument passing. + unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + static const uint16_t GPR[] = { + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + static const uint16_t *FPR = GetFPR(); + + static const uint16_t VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + const unsigned NumGPRs = array_lengthof(GPR); + const unsigned NumFPRs = 13; + const unsigned NumVRs = array_lengthof(VR); + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<TailCallArgumentInfo, 8> TailCallArguments; + + SmallVector<SDValue, 8> MemOpChains; + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // PtrOff will be used to store the current argument to the stack if a + // register cannot be found for it. + SDValue PtrOff; + + PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + + // Promote integers to 64-bit values. + if (Arg.getValueType() == MVT::i32) { + // FIXME: Should this use ANY_EXTEND if neither sext nor zext? + unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); + } + + // FIXME memcpy is used way more than necessary. Correctness first. + // Note: "by value" is code for passing a structure by value, not + // basic types. + if (Flags.isByVal()) { + // Note: Size includes alignment padding, so + // struct x { short a; char b; } + // will have Size = 4. With #pragma pack(1), it will have Size = 3. + // These are the proper values we need for right-justifying the + // aggregate in a parameter register. + unsigned Size = Flags.getByValSize(); + + // An empty aggregate parameter takes up no storage and no + // registers. + if (Size == 0) + continue; + + // All aggregates smaller than 8 bytes must be passed right-justified. + if (Size==1 || Size==2 || Size==4) { + EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, + MachinePointerInfo(), VT, + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + + ArgOffset += PtrByteSize; + continue; + } + } + + if (GPR_idx == NumGPRs && Size < 8) { + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, + CallSeqStart, + Flags, DAG, dl); + ArgOffset += PtrByteSize; + continue; + } + // Copy entire object into memory. There are cases where gcc-generated + // code assumes it is there, even if it could be put entirely into + // registers. (This is not what the doc says.) + + // FIXME: The above statement is likely due to a misunderstanding of the + // documents. All arguments must be copied into the parameter area BY + // THE CALLEE in the event that the callee takes the address of any + // formal argument. That has not yet been implemented. However, it is + // reasonable to use the stack area as a staging area for the register + // load. + + // Skip this for small aggregates, as we will use the same slot for a + // right-justified copy, below. + if (Size >= 8) + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, + CallSeqStart, + Flags, DAG, dl); + + // When a register is available, pass a small aggregate right-justified. + if (Size < 8 && GPR_idx != NumGPRs) { + // The easiest way to get this right-justified in a register + // is to copy the structure into the rightmost portion of a + // local variable slot, then load the whole slot into the + // register. + // FIXME: The memcpy seems to produce pretty awful code for + // small aggregates, particularly for packed ones. + // FIXME: It would be preferable to use the slot in the + // parameter save area instead of a new local variable. + SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, + CallSeqStart, + Flags, DAG, dl); + + // Load the slot into the register. + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, + MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + + // Done with this argument. + ArgOffset += PtrByteSize; + continue; + } + + // For aggregates larger than PtrByteSize, copy the pieces of the + // object that fit into registers from the parameter save area. + for (unsigned j=0; j<Size; j+=PtrByteSize) { + SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); + SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, + MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + ArgOffset += PtrByteSize; + } else { + ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; + break; + } + } + continue; + } + + switch (Arg.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i32: + case MVT::i64: + if (GPR_idx != NumGPRs) { + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); + } else { + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + true, isTailCall, false, MemOpChains, + TailCallArguments, dl); + } + ArgOffset += PtrByteSize; + break; + case MVT::f32: + case MVT::f64: + if (FPR_idx != NumFPRs) { + RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); + + if (isVarArg) { + // A single float or an aggregate containing only a single float + // must be passed right-justified in the stack doubleword, and + // in the GPR, if one is available. + SDValue StoreOff; + if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { + SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); + StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); + } else + StoreOff = PtrOff; + + SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Store); + + // Float varargs are always shadowed in available integer registers + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, + MachinePointerInfo(), false, false, + false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + } else if (GPR_idx != NumGPRs) + // If we have any FPRs remaining, we may also have GPRs remaining. + ++GPR_idx; + } else { + // Single-precision floating-point values are mapped to the + // second (rightmost) word of the stack doubleword. + if (Arg.getValueType() == MVT::f32) { + SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); + } + + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + true, isTailCall, false, MemOpChains, + TailCallArguments, dl); + } + ArgOffset += 8; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (isVarArg) { + // These go aligned on the stack, or in the corresponding R registers + // when within range. The Darwin PPC ABI doc claims they also go in + // V registers; in fact gcc does this only for arguments that are + // prototyped, not for those that match the ... We do it for all + // arguments, seems to work. + while (ArgOffset % 16 !=0) { + ArgOffset += PtrByteSize; + if (GPR_idx != NumGPRs) + GPR_idx++; + } + // We could elide this store in the case where the object fits + // entirely in R registers. Maybe later. + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, + DAG.getConstant(ArgOffset, PtrVT)); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Store); + if (VR_idx != NumVRs) { + SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, + MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); + } + ArgOffset += 16; + for (unsigned i=0; i<16; i+=PtrByteSize) { + if (GPR_idx == NumGPRs) + break; + SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, + DAG.getConstant(i, PtrVT)); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + break; + } + + // Non-varargs Altivec params generally go in registers, but have + // stack space allocated at the end. + if (VR_idx != NumVRs) { + // Doesn't have GPR space allocated. + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); + } else { + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + true, isTailCall, true, MemOpChains, + TailCallArguments, dl); + ArgOffset += 16; + } + break; + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Check if this is an indirect call (MTCTR/BCTRL). + // See PrepareCall() for more information about calls through function + // pointers in the 64-bit SVR4 ABI. + if (!isTailCall && + !dyn_cast<GlobalAddressSDNode>(Callee) && + !dyn_cast<ExternalSymbolSDNode>(Callee) && + !isBLACompatibleAddress(Callee, DAG)) { + // Load r2 into a virtual register and store it to the TOC save area. + SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); + // TOC save area offset. + SDValue PtrOff = DAG.getIntPtrConstant(40); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), + false, false, 0); + // R12 must contain the address of an indirect callee. This does not + // mean the MTCTR instruction must use R12; it's easier to model this + // as an extra parameter, so do that. + RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); + } + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + if (isTailCall) + PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, + FPOp, true, TailCallArguments); + + return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, + Ins, InVals); +} + SDValue PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, @@ -3192,7 +4054,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - unsigned NumOps = Outs.size(); + unsigned NumOps = Outs.size(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; @@ -3299,11 +4161,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, } // FIXME memcpy is used way more than necessary. Correctness first. + // Note: "by value" is code for passing a structure by value, not + // basic types. if (Flags.isByVal()) { unsigned Size = Flags.getByValSize(); + // Very small objects are passed right-justified. Everything else is + // passed left-justified. if (Size==1 || Size==2) { - // Very small objects are passed right-justified. - // Everything else is passed left-justified. EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, @@ -3314,17 +4178,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, ArgOffset += PtrByteSize; } else { - SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); - SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, - CallSeqStart.getNode()->getOperand(0), - Flags, DAG, dl); - // This must go outside the CALLSEQ_START..END. - SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1)); - DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), - NewCallSeqStart.getNode()); - Chain = CallSeqStart = NewCallSeqStart; + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, + CallSeqStart, + Flags, DAG, dl); ArgOffset += PtrByteSize; } continue; @@ -3332,15 +4191,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) - SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, - CallSeqStart.getNode()->getOperand(0), - Flags, DAG, dl); - // This must go outside the CALLSEQ_START..END. - SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1)); - DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); - Chain = CallSeqStart = NewCallSeqStart; - // And copy the pieces of it that fit into registers. + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, + CallSeqStart, + Flags, DAG, dl); + + // For small aggregates (Darwin only) and aggregates >= PtrByteSize, + // copy the pieces of the object that fit into registers from the + // parameter save area. for (unsigned j=0; j<Size; j+=PtrByteSize) { SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); @@ -3409,11 +4266,10 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, !isPPC64) // PPC64 has 64-bit GPR's obviously :) ++GPR_idx; } - } else { + } else LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, isPPC64, isTailCall, false, MemOpChains, TailCallArguments, dl); - } if (isPPC64) ArgOffset += 8; else @@ -3508,22 +4364,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Check if this is an indirect call (MTCTR/BCTRL). - // See PrepareCall() for more information about calls through function - // pointers in the 64-bit SVR4 ABI. - if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() && - !dyn_cast<GlobalAddressSDNode>(Callee) && - !dyn_cast<ExternalSymbolSDNode>(Callee) && - !isBLACompatibleAddress(Callee, DAG)) { - // Load r2 into a virtual register and store it to the TOC save area. - SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); - // TOC save area offset. - SDValue PtrOff = DAG.getIntPtrConstant(40); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), - false, false, 0); - } - // On Darwin, R12 must contain the address of an indirect callee. This does // not mean the MTCTR instruction must use R12; it's easier to model this as // an extra parameter, so do that. @@ -3588,8 +4428,24 @@ PPCTargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - OutVals[i], Flag); + + SDValue Arg = OutVals[i]; + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); } @@ -3821,7 +4677,52 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i64) { - SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); + SDValue SINT = Op.getOperand(0); + // When converting to single-precision, we actually need to convert + // to double-precision first and then round to single-precision. + // To avoid double-rounding effects during that operation, we have + // to prepare the input operand. Bits that might be truncated when + // converting to double-precision are replaced by a bit that won't + // be lost at this stage, but is below the single-precision rounding + // position. + // + // However, if -enable-unsafe-fp-math is in effect, accept double + // rounding to avoid the extra overhead. + if (Op.getValueType() == MVT::f32 && + !DAG.getTarget().Options.UnsafeFPMath) { + + // Twiddle input to make sure the low 11 bits are zero. (If this + // is the case, we are guaranteed the value will fit into the 53 bit + // mantissa of an IEEE double-precision value without rounding.) + // If any of those low 11 bits were not zero originally, make sure + // bit 12 (value 2048) is set instead, so that the final rounding + // to single-precision gets the correct result. + SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, + SINT, DAG.getConstant(2047, MVT::i64)); + Round = DAG.getNode(ISD::ADD, dl, MVT::i64, + Round, DAG.getConstant(2047, MVT::i64)); + Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); + Round = DAG.getNode(ISD::AND, dl, MVT::i64, + Round, DAG.getConstant(-2048, MVT::i64)); + + // However, we cannot use that value unconditionally: if the magnitude + // of the input value is small, the bit-twiddling we did above might + // end up visibly changing the output. Fortunately, in that case, we + // don't need to twiddle bits since the original input will convert + // exactly to double-precision floating-point already. Therefore, + // construct a conditional to use the original value if the top 11 + // bits are all sign-bit copies, and use the rounded value computed + // above otherwise. + SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, + SINT, DAG.getConstant(53, MVT::i32)); + Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, + Cond, DAG.getConstant(1, MVT::i64)); + Cond = DAG.getSetCC(dl, MVT::i32, + Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); + + SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); + } + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); if (Op.getValueType() == MVT::f32) FP = DAG.getNode(ISD::FP_ROUND, dl, @@ -5670,6 +6571,14 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const { case 'v': case 'y': return C_RegisterClass; + case 'Z': + // FIXME: While Z does indicate a memory constraint, it specifically + // indicates an r+r address (used in conjunction with the 'y' modifier + // in the replacement string). Currently, we're forcing the base + // register to be r0 in the asm printer (which is interpreted as zero) + // and forming the complete address in the second register. This is + // suboptimal. + return C_Memory; } } return TargetLowering::getConstraintType(Constraint); @@ -5712,6 +6621,9 @@ PPCTargetLowering::getSingleConstraintMatchWeight( case 'y': weight = CW_Register; break; + case 'Z': + weight = CW_Memory; + break; } return weight; } @@ -5728,9 +6640,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); case 'f': - if (VT == MVT::f32) + if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::F4RCRegClass); - if (VT == MVT::f64) + if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); break; case 'v': @@ -5910,7 +6822,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && MFI->getStackSize() && - !MF.getFunction()->hasFnAttr(Attribute::Naked); + !MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : (is31 ? PPC::R31 : PPC::R1); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, @@ -5933,16 +6846,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If -/// 'IsZeroVal' is true, that means it's safe to return a -/// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is -/// constant so it does not need to be loaded. +/// probably because the source does not need to be loaded. If 'IsMemset' is +/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that +/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy +/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 902b188..12b3df7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -17,8 +17,8 @@ #include "PPC.h" #include "PPCSubtarget.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { namespace PPCISD { @@ -178,6 +178,65 @@ namespace llvm { CR6SET, CR6UNSET, + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec + /// TLS model, produces an ADDIS8 instruction that adds the GOT + /// base to sym@got@tprel@ha. + ADDIS_GOT_TPREL_HA, + + /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec + /// TLS model, produces a LD instruction with base register G8RReg + /// and offset sym@got@tprel@l. This completes the addition that + /// finds the offset of "sym" relative to the thread pointer. + LD_GOT_TPREL_L, + + /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS + /// model, produces an ADD instruction that adds the contents of + /// G8RReg to the thread pointer. Symbol contains a relocation + /// sym@tls which is to be replaced by the thread pointer and + /// identifies to the linker that the instruction is part of a + /// TLS sequence. + ADD_TLS, + + /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS + /// model, produces an ADDIS8 instruction that adds the GOT base + /// register to sym@got@tlsgd@ha. + ADDIS_TLSGD_HA, + + /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@tlsgd@l. + ADDI_TLSGD_L, + + /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// model, produces a call to __tls_get_addr(sym@tlsgd). + GET_TLS_ADDR, + + /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS + /// model, produces an ADDIS8 instruction that adds the GOT base + /// register to sym@got@tlsld@ha. + ADDIS_TLSLD_HA, + + /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@tlsld@l. + ADDI_TLSLD_L, + + /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// model, produces a call to __tls_get_addr(sym@tlsld). + GET_TLSLD_ADDR, + + /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the + /// local-dynamic TLS model, produces an ADDIS8 instruction + /// that adds X3 to sym@dtprel@ha. The Chain operand is needed + /// to tie this in place following a copy to %X3 from the result + /// of a GET_TLSLD_ADDR. + ADDIS_DTPREL_HA, + + /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@dtprel@l. + ADDI_DTPREL_L, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -191,7 +250,21 @@ namespace llvm { /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 /// or i32. - LBRX + LBRX, + + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium code model, produces + /// an ADDIS8 instruction that adds the TOC base register to sym@toc@ha. + ADDIS_TOC_HA, + + /// G8RC = LD_TOC_L Symbol, G8RReg - For medium code model, produces a + /// LD instruction with base register G8RReg and offset sym@toc@l. + /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + LD_TOC_L, + + /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces + /// an ADDI8 instruction that adds G8RReg to sym@toc@l. + /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + ADDI_TOC_L }; } @@ -358,16 +431,15 @@ namespace llvm { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, bool MemcpyStrSrc, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than @@ -467,20 +539,41 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG) const; SDValue + extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, + SDValue ArgVal, DebugLoc dl) const; + + void + setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, + unsigned nAltivecParamsAtEnd, + unsigned MinReservedArea, bool isPPC64) const; + + SDValue LowerFormalArguments_Darwin(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerFormalArguments_SVR4(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + LowerFormalArguments_64SVR4(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue + LowerFormalArguments_32SVR4(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, + SDValue CallSeqStart, ISD::ArgFlagsTy Flags, + SelectionDAG &DAG, DebugLoc dl) const; + + SDValue + LowerCall_Darwin(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, @@ -488,13 +581,22 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + LowerCall_64SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, + bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue + LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; }; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index cfe71d17..1dd5415 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -32,6 +32,15 @@ def symbolLo64 : Operand<i64> { def tocentry : Operand<iPTR> { let MIOperandInfo = (ops i32imm:$imm); } +def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64 + let PrintMethod = "printMemRegImm"; + let EncoderMethod = "getMemRIXEncoding"; + let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg); +} +def tlsreg : Operand<i64> { + let EncoderMethod = "getTLSRegEncoding"; +} +def tlsgd : Operand<i64> {} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -63,7 +72,7 @@ def HI48_64 : SDNodeXForm<imm, [{ // let Defs = [LR8] in - def MovePCtoLR8 : Pseudo<(outs), (ins), "", []>, + def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>, PPC970_Unit_BRU; // Darwin ABI Calls. @@ -99,6 +108,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { (outs), (ins calltarget:$func), "bl $func\n\tnop", BrB, []>; + let isCodeGenOnly = 1 in + def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, tlsgd:$sym), + "bl $func($sym)\n\tnop", BrB, []>; + + let isCodeGenOnly = 1 in + def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, tlsgd:$sym), + "bl $func($sym)\n\tnop", BrB, []>; + def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; @@ -141,31 +160,31 @@ def : Pat<(PPCnop), let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64", [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_SUB_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64", [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_OR_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64", [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_XOR_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64", [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_AND_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64", [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_NAND_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64", [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64", [(set G8RC:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>; def ATOMIC_SWAP_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64", [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>; } } @@ -234,10 +253,10 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let Defs = [CTR8], Uses = [CTR8] in { - def BDZ8 : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst", BrB, []>; - def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst", BrB, []>; + def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } @@ -247,7 +266,7 @@ def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), PPC970_MicroCode, PPC970_Unit_CRU; def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), - "", SprMFCR>, + "#MFCR8pseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), @@ -278,7 +297,7 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), // the POWER3. let Defs = [X1], Uses = [X1] in -def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"", +def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8", [(set G8RC:$result, (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>; @@ -359,6 +378,11 @@ def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "add $rT, $rA, $rB", IntSimple, [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>; +// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the +// initial-exec thread-local storage model. +def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB), + "add $rT, $rA, $rB@tls", IntSimple, + [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>; let Defs = [CARRY] in { def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), @@ -494,16 +518,16 @@ def RLDIMI : MDForm_1<30, 3, // Rotate instructions. def RLDCL : MDForm_1<30, 0, - (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB), - "rldcl $rA, $rS, $rB, $MB", IntRotateD, + (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE), + "rldcl $rA, $rS, $rB, $MBE", IntRotateD, []>, isPPC64; def RLDICL : MDForm_1<30, 0, - (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldicl $rA, $rS, $SH, $MB", IntRotateDI, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; def RLDICR : MDForm_1<30, 1, - (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), - "rldicr $rA, $rS, $SH, $ME", IntRotateDI, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE), + "rldicr $rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; def RLWINM8 : MForm_2<21, @@ -511,7 +535,7 @@ def RLWINM8 : MForm_2<21, "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, []>; -def ISEL8 : AForm_1<31, 15, +def ISEL8 : AForm_4<31, 15, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; @@ -556,7 +580,7 @@ def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -606,7 +630,7 @@ def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -625,27 +649,33 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; +def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src), + "ld $rD, $src", LdStLD, + []>, isPPC64; +// The following three definitions are selected for small code model only. +// Otherwise, we need to create two instructions to form a 32-bit offset, +// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), - "", + "#LDtoc", [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), - "", + "#LDtocJTI", [(set G8RC:$rD, (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), - "", + "#LDtocCPT", [(set G8RC:$rD, (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; let hasSideEffects = 1 in { -let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. -def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg), +let RST = 2, DS = 2 in +def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, [(PPCload_toc G8RC:$reg)]>, isPPC64; -let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. -def LDtoc_restore : DSForm_1<58, 0, (outs), (ins), +let RST = 2, DS = 10, RA = 1 in +def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), "ld 2, 40(1)", LdStLD, [(PPCtoc_restore)]>, isPPC64; } @@ -671,6 +701,77 @@ def : Pat<(PPCload ixaddr:$src), def : Pat<(PPCload xaddr:$src), (LDX xaddr:$src)>; +// Support for medium code model. +def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), + "#ADDIStocHA", + [(set G8RC:$rD, + (PPCaddisTocHA G8RC:$reg, tglobaladdr:$disp))]>, + isPPC64; +def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "#LDtocL", + [(set G8RC:$rD, + (PPCldTocL tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; +def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), + "#ADDItocL", + [(set G8RC:$rD, + (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64; + +// Support for thread-local storage. +def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDISgotTprelHA", + [(set G8RC:$rD, + (PPCaddisGotTprelHA G8RC:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg), + "#LDgotTprelL", + [(set G8RC:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>, + isPPC64; +def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g), + (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>; +def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDIStlsgdHA", + [(set G8RC:$rD, + (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDItlsgdL", + [(set G8RC:$rD, + (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), + "#GETtlsADDR", + [(set G8RC:$rD, + (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDIStlsldHA", + [(set G8RC:$rD, + (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDItlsldL", + [(set G8RC:$rD, + (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), + "#GETtlsldADDR", + [(set G8RC:$rD, + (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDISdtprelHA", + [(set G8RC:$rD, + (PPCaddisDtprelHA G8RC:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDIdtprelL", + [(set G8RC:$rD, + (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; + let PPC970_Unit = 2 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), @@ -706,7 +807,7 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), let PPC970_Unit = 2 in { -def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, +def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index b0b8423..0cf28ae 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -158,10 +158,6 @@ def vecspltisw : PatLeaf<(build_vector), [{ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0; }], VSPLTISW_get_imm>; -def V_immneg0 : PatLeaf<(build_vector), [{ - return PPC::isAllNegativeZeroVector(N); -}]>; - //===----------------------------------------------------------------------===// // Helpers for defining instructions that directly correspond to intrinsics. @@ -340,6 +336,28 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vctuxs $vD, $vB, $UIMM", VecFP, [(set VRRC:$vD, (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>; + +// Defines with the UIM field set to 0 for floating-point +// to integer (fp_to_sint/fp_to_uint) conversions and integer +// to floating-point (sint_to_fp/uint_to_fp) conversions. +let VA = 0 in { +def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB), + "vcfsx $vD, $vB, 0", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>; +def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB), + "vctuxs $vD, $vB, 0", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>; +def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB), + "vcfux $vD, $vB, 0", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vcfux VRRC:$vB, 0))]>; +def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB), + "vctsxs $vD, $vB, 0", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>; +} def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>; def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>; @@ -563,7 +581,12 @@ def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins), "vxor $vD, $vD, $vD", VecFP, [(set VRRC:$vD, (v4i32 immAllZerosV))]>; +let IMM=-1 in { +def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins), + "vspltisw $vD, -1", VecFP, + [(set VRRC:$vD, (v4i32 immAllOnesV))]>; } +} // VALU Operations. //===----------------------------------------------------------------------===// // Additional Altivec Patterns @@ -650,7 +673,8 @@ def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))), (VANDC VRRC:$A, VRRC:$B)>; def : Pat<(fmul VRRC:$vA, VRRC:$vB), - (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; + (VMADDFP VRRC:$vA, VRRC:$vB, + (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; // Fused multiply add and multiply sub for packed float. These are represented // separately from the real instructions above, for operations that must have @@ -689,3 +713,23 @@ def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>; def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>; + +// Float to integer and integer to float conversions +def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))), + (VCTSXS_0 VRRC:$vA)>; +def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))), + (VCTUXS_0 VRRC:$vA)>; +def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))), + (VCFSX_0 VRRC:$vA)>; +def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))), + (VCFUX_0 VRRC:$vA)>; + +// Floating-point rounding +def : Pat<(v4f32 (ffloor (v4f32 VRRC:$vA))), + (VRFIM VRRC:$vA)>; +def : Pat<(v4f32 (fceil (v4f32 VRRC:$vA))), + (VRFIP VRRC:$vA)>; +def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))), + (VRFIZ VRRC:$vA)>; +def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))), + (VRFIN VRRC:$vA)>; diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index a41a027..c3c171c 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -94,12 +94,6 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, let Inst{31} = lk; } -class IForm_ext<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list<dag> pattern> - : IForm<opcode, aa, lk, OOL, IOL, asmstr, itin, pattern> { - let LI{0-4} = bo; -} - // 1.7.2 B-Form class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> : I<opcode, OOL, IOL, asmstr, BrB> { @@ -118,6 +112,13 @@ class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> let Inst{31} = lk; } +class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, + string asmstr> + : BForm<opcode, aa, lk, OOL, IOL, asmstr> { + let BIBO{4-0} = bo; + let BIBO{6-5} = 0; + let CR = 0; +} // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, @@ -625,9 +626,9 @@ class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FXM; - bits<5> ST; + bits<5> rS; - let Inst{6-10} = ST; + let Inst{6-10} = rS; let Inst{11} = 0; let Inst{12-19} = FXM; let Inst{20} = 0; @@ -666,7 +667,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, string cstr, InstrItinClass itin, list<dag>pattern> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FM; - bits<5> RT; + bits<5> rT; bit RC = 0; // set by isDOT let Pattern = pattern; @@ -675,7 +676,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{6} = 0; let Inst{7-14} = FM; let Inst{15} = 0; - let Inst{16-20} = RT; + let Inst{16-20} = rT; let Inst{21-30} = xo; let Inst{31} = RC; } @@ -758,6 +759,26 @@ class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, let FRB = 0; } +class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12). + bits<3> CR; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-23} = CR; + let Inst{24-25} = BIBO{6-5}; + let Inst{26-30} = xo; + let Inst{31} = 0; +} + // 1.7.13 M-Form class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index d2df664..a0517a8 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -12,12 +12,13 @@ //===----------------------------------------------------------------------===// #include "PPCInstrInfo.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" +#include "PPCHazardRecognizers.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" -#include "PPCHazardRecognizers.h" -#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -28,7 +29,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" @@ -60,7 +60,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( return new PPCScoreboardHazardRecognizer(II, DAG); } - return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); } /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer @@ -141,7 +141,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Normal instructions can be commuted the obvious way. if (MI->getOpcode() != PPC::RLWIMI) - return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstruction(MI, NewMI); // Cannot commute if it has a non-zero rotate count. if (MI->getOperand(3).getImm() != 0) @@ -570,12 +570,15 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // STVX VAL, 0, R0 // // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); + unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), FrameIdx, 0, 0)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) .addReg(SrcReg, getKillRegState(isKill)) - .addReg(PPC::R0) - .addReg(PPC::R0)); + .addReg(GPR0) + .addReg(GPR0)); } else { llvm_unreachable("Unknown regclass!"); } @@ -707,10 +710,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, // Dest = LVX 0, R0 // // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); + unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0) - .addReg(PPC::R0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) + .addReg(GPR0)); } else { llvm_unreachable("Unknown regclass!"); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a503908..8c077b7 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -91,6 +91,20 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; +def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; +def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, + [SDNPMayLoad]>; +def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; +def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; +def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; +def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; +def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; +def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; +def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, + [SDNPHasChain]>; +def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; + def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift @@ -167,6 +181,12 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, [SDNPHasChain, SDNPMayStore]>; +// Instructions to support medium code model +def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>; +def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>; +def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>; + + // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; @@ -369,9 +389,9 @@ def IsBookE : Predicate<"PPCSubTarget.isBookE()">; let hasCtrlDep = 1 in { let Defs = [R1], Uses = [R1] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "", +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt", [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "", +def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -380,7 +400,7 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), } let Defs = [R1], Uses = [R1] in -def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "", +def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC", [(set GPRC:$result, (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>; @@ -389,19 +409,19 @@ def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "", let usesCustomInserter = 1, // Expanded after instruction selection. PPC970_Single = 1 in { def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_I4", []>; def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_I8", []>; def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_F4", []>; def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_F8", []>; def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_VRRC", []>; } @@ -409,16 +429,16 @@ let usesCustomInserter = 1, // Expanded after instruction selection. // scavenge a register for it. let mayStore = 1 in def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F), - "", []>; + "#SPILL_CR", []>; // RESTORE_CR - Indicate that we're restoring the CR register (previously // spilled), so we'll need to scavenge a register for it. let mayLoad = 1 in def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), - "", []>; + "#RESTORE_CR", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { - let isReturn = 1, Uses = [LR, RM] in + let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p), "b${p:cc}lr ${p:reg}", BrB, [(retflag)]>; @@ -427,7 +447,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { } let Defs = [LR] in - def MovePCtoLR : Pseudo<(outs), (ins), "", []>, + def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { @@ -439,16 +459,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { // BCC represents an arbitrary conditional branch on a predicate. // FIXME: should be able to write a pattern for PPCcondbranch, but can't use - // a two-value operand where a dag node expects two operands. :( - def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), - "b${cond:cc} ${cond:reg}, $dst" - /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; + // a two-value operand where a dag node expects two operands. :( + let isCodeGenOnly = 1 in + def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc} ${cond:reg}, $dst" + /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; let Defs = [CTR], Uses = [CTR] in { - def BDZ : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst", BrB, []>; - def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst", BrB, []>; + def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } @@ -564,81 +585,81 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8", [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8", [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8", [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8", [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8", [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8", [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16", [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16", [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16", [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16", [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16", [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16", [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32", [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32", [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32", [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32", [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32", [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32", [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8", [(set GPRC:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_CMP_SWAP_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", [(set GPRC:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_CMP_SWAP_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", [(set GPRC:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_SWAP_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8", [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>; def ATOMIC_SWAP_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16", [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>; def ATOMIC_SWAP_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32", [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>; } } @@ -731,7 +752,7 @@ def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -1207,7 +1228,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), // // FIXME: Make this a real Pseudo instruction when the JIT switches to MC. def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), - "", SprMFCR>, + "#MFCRpseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), @@ -1394,13 +1415,13 @@ let Uses = [RM] in { "fdivs $FRT, $FRA, $FRB", FPDivS, [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>; def FMUL : AForm_3<63, 25, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fmul $FRT, $FRA, $FRB", FPFused, - [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>; + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC), + "fmul $FRT, $FRA, $FRC", FPFused, + [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>; def FMULS : AForm_3<59, 25, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), - "fmuls $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC), + "fmuls $FRT, $FRA, $FRC", FPGeneral, + [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fsub $FRT, $FRA, $FRB", FPAddSub, @@ -1413,7 +1434,7 @@ let Uses = [RM] in { } let PPC970_Unit = 1 in { // FXU Operations. - def ISEL : AForm_1<31, 15, + def ISEL : AForm_4<31, 15, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index aba2739..851de17 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -15,10 +15,10 @@ #include "PPCJITInfo.h" #include "PPCRelocations.h" #include "PPCTargetMachine.h" -#include "llvm/Function.h" -#include "llvm/Support/Memory.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h index 2f8243a..46d4a08 100644 --- a/lib/Target/PowerPC/PPCJITInfo.h +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -14,8 +14,8 @@ #ifndef POWERPC_JITINFO_H #define POWERPC_JITINFO_H -#include "llvm/Target/TargetJITInfo.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Target/TargetJITInfo.h" namespace llvm { class PPCTargetMachine; diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 19ec993..73f7a2c 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -20,7 +21,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" -#include "llvm/ADT/SmallString.h" using namespace llvm; static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index ab8bf1f..378c147 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -15,32 +15,32 @@ #define DEBUG_TYPE "reginfo" #include "PPCRegisterInfo.h" #include "PPC.h" +#include "PPCFrameLowering.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" -#include "PPCFrameLowering.h" #include "PPCSubtarget.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include <cstdlib> #define GET_REGINFO_TARGET_DESC @@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, ST.isPPC64() ? 0 : 1), - Subtarget(ST), TII(tii) { + Subtarget(ST), TII(tii), CRSpillFrameIdx(0) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -111,10 +111,15 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : CSR_Darwin32_SaveList; + // For 32-bit SVR4, also initialize the frame index associated with + // the CR spill slot. + if (!Subtarget.isPPC64()) + CRSpillFrameIdx = 0; + return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; } -const unsigned* +const uint32_t* PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? CSR_Darwin64_RegMask : @@ -477,6 +482,32 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +bool +PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, + unsigned Reg, int &FrameIdx) const { + + // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 + // ABI, return true to prevent allocating an additional frame slot. + // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 + // is arbitrary and will be subsequently ignored. For 32-bit, we must + // create exactly one stack slot and return its FrameIdx for all + // nonvolatiles. + if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { + if (Subtarget.isPPC64()) { + FrameIdx = 0; + } else if (CRSpillFrameIdx) { + FrameIdx = CRSpillFrameIdx; + } else { + MachineFrameInfo *MFI = + (const_cast<MachineFunction &>(MF)).getFrameInfo(); + FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); + CRSpillFrameIdx = FrameIdx; + } + return true; + } + return false; +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { @@ -566,7 +597,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to Offset to get the correct offset. // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. - if (!MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (!MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 152c36d..a8fd796 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -30,6 +30,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { std::map<unsigned, unsigned> ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; + mutable int CRSpillFrameIdx; public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); @@ -43,7 +44,7 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const unsigned *getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; BitVector getReservedRegs(const MachineFunction &MF) const; @@ -65,6 +66,8 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, + int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index bb193ac..d9b4e30 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" -#include "PPCRegisterInfo.h" #include "PPC.h" -#include "llvm/GlobalValue.h" -#include "llvm/Target/TargetMachine.h" +#include "PPCRegisterInfo.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" #include <cstdlib> #define GET_SUBTARGETINFO_TARGET_DESC @@ -54,19 +54,26 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, CPUName = sys::getHostCPUName(); #endif - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); - // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); + // Make sure 64-bit features are available when CPUname is generic + std::string FullFS = FS; + // If we are generating code for ppc64, verify that options make sense. if (is64Bit) { Has64BitSupport = true; // Silently force 64-bit register use on ppc64. Use64BitRegs = true; + if (!FullFS.empty()) + FullFS = "+64bit," + FullFS; + else + FullFS = "+64bit"; } - + + // Parse features string. + ParseSubtargetFeatures(CPUName, FullFS); + // If the user requested use of 64-bit regs, but the cpu selected doesn't // support it, ignore. if (use64BitRegs() && !has64BitSupport()) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index b8b1614..416c0f3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -14,9 +14,9 @@ #ifndef POWERPCSUBTARGET_H #define POWERPCSUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> #define GET_SUBTARGETINFO_HEADER @@ -33,34 +33,34 @@ namespace PPC { enum { DIR_NONE, DIR_32, - DIR_440, - DIR_601, - DIR_602, - DIR_603, + DIR_440, + DIR_601, + DIR_602, + DIR_603, DIR_7400, - DIR_750, - DIR_970, + DIR_750, + DIR_970, DIR_A2, DIR_E500mc, DIR_E5500, DIR_PWR6, DIR_PWR7, - DIR_64 + DIR_64 }; } class GlobalValue; class TargetMachine; - + class PPCSubtarget : public PPCGenSubtargetInfo { protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned StackAlignment; - + /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; - + /// Which cpu directive was used. unsigned DarwinDirective; @@ -76,7 +76,7 @@ protected: bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; - + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -86,11 +86,11 @@ public: /// PPCSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit); - - /// ParseSubtargetFeatures - Parses features string setting specified + + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - + /// SetJITMode - This is called to inform the subtarget info that we are /// producing code for the JIT. void SetJITMode(); @@ -99,20 +99,27 @@ public: /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. unsigned getStackAlignment() const { return StackAlignment; } - + /// getDarwinDirective - Returns the -m directive specified for the cpu. /// unsigned getDarwinDirective() const { return DarwinDirective; } - - /// getInstrItins - Return the instruction itineraies based on subtarget + + /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - /// getTargetDataString - Return the pointer size and type alignment + /// getDataLayoutString - Return the pointer size and type alignment /// properties of this subtarget. - const char *getTargetDataString() const { + const char *getDataLayoutString() const { // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). + if (isPPC64() && isSVR4ABI()) { + if (TargetTriple.getOS() == llvm::Triple::FreeBSD) + return "E-p:64:64-f64:64:64-i64:64:64-f128:64:64-v128:128:128-n32:64"; + else + return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64"; + } + return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; } @@ -120,22 +127,22 @@ public: /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. /// bool isPPC64() const { return IsPPC64; } - + /// has64BitSupport - Return true if the selected CPU supports 64-bit /// instructions, regardless of whether we are in 32-bit or 64-bit mode. bool has64BitSupport() const { return Has64BitSupport; } - + /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit /// registers in 32-bit mode when possible. This can only true if /// has64BitSupport() returns true. bool use64BitRegs() const { return Use64BitRegs; } - + /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. - bool hasLazyResolverStub(const GlobalValue *GV, + bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const; - + // isJITCodeModel - True if we're generating code for the JIT bool isJITCodeModel() const { return IsJITCodeModel; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 9805112..b8b7882 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -13,13 +13,13 @@ #include "PPCTargetMachine.h" #include "PPC.h" -#include "llvm/PassManager.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; static cl:: @@ -40,7 +40,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, bool is64Bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), - DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), + DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 7da2b0c..d917d99 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -15,13 +15,13 @@ #define PPC_TARGETMACHINE_H #include "PPCFrameLowering.h" -#include "PPCSubtarget.h" -#include "PPCJITInfo.h" -#include "PPCInstrInfo.h" #include "PPCISelLowering.h" +#include "PPCInstrInfo.h" +#include "PPCJITInfo.h" #include "PPCSelectionDAGInfo.h" +#include "PPCSubtarget.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" namespace llvm { @@ -29,7 +29,7 @@ namespace llvm { /// class PPCTargetMachine : public LLVMTargetMachine { PPCSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment PPCInstrInfo InstrInfo; PPCFrameLowering FrameLowering; PPCJITInfo JITInfo; @@ -58,7 +58,7 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const DataLayout *getDataLayout() const { return &DL; } virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp index 5dc8568..fa44331 100644 --- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp +++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; |
