diff options
Diffstat (limited to 'lib/Target/PowerPC')
42 files changed, 2607 insertions, 1360 deletions
diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt new file mode 100644 index 0000000..3aa59c0 --- /dev/null +++ b/lib/Target/PowerPC/AsmParser/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMPowerPCAsmParser + PPCAsmParser.cpp + ) + +add_dependencies(LLVMPowerPCAsmParser PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..bd08c13 --- /dev/null +++ b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PowerPCAsmParser +parent = PowerPC +required_libraries = PowerPCInfo MC MCParser Support +add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/AsmParser/Makefile b/lib/Target/PowerPC/AsmParser/Makefile new file mode 100644 index 0000000..c8a8915 --- /dev/null +++ b/lib/Target/PowerPC/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/PowerPC/AsmParser/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMPowerPCAsmParser + +# Hack: we need to include 'main' PowerPC target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp new file mode 100644 index 0000000..9cf16f0 --- /dev/null +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -0,0 +1,723 @@ +//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +static unsigned RRegs[32] = { + PPC::R0, PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; +static unsigned RRegsNoR0[32] = { + PPC::ZERO, + PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; +static unsigned XRegs[32] = { + PPC::X0, PPC::X1, PPC::X2, PPC::X3, + PPC::X4, PPC::X5, PPC::X6, PPC::X7, + PPC::X8, PPC::X9, PPC::X10, PPC::X11, + PPC::X12, PPC::X13, PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31 +}; +static unsigned XRegsNoX0[32] = { + PPC::ZERO8, + PPC::X1, PPC::X2, PPC::X3, + PPC::X4, PPC::X5, PPC::X6, PPC::X7, + PPC::X8, PPC::X9, PPC::X10, PPC::X11, + PPC::X12, PPC::X13, PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31 +}; +static unsigned FRegs[32] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31 +}; +static unsigned VRegs[32] = { + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 +}; +static unsigned CRBITRegs[32] = { + PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, + PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, + PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, + PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN +}; +static unsigned CRRegs[8] = { + PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, + PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7 +}; + +struct PPCOperand; + +class PPCAsmParser : public MCTargetAsmParser { + MCSubtargetInfo &STI; + MCAsmParser &Parser; + bool IsPPC64; + + MCAsmParser &getParser() const { return Parser; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + + bool isPPC64() const { return IsPPC64; } + + bool MatchRegisterName(const AsmToken &Tok, + unsigned &RegNo, int64_t &IntVal); + + virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool ParseDirectiveWord(unsigned Size, SMLoc L); + bool ParseDirectiveTC(unsigned Size, SMLoc L); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); + + void ProcessInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + + /// @name Auto-generated Match Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "PPCGenAsmMatcher.inc" + + /// } + + +public: + PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + // Check for 64-bit vs. 32-bit pointer mode. + Triple TheTriple(STI.getTargetTriple()); + IsPPC64 = TheTriple.getArch() == Triple::ppc64; + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + virtual bool ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + virtual bool ParseDirective(AsmToken DirectiveID); +}; + +/// PPCOperand - Instances of this class represent a parsed PowerPC machine +/// instruction. +struct PPCOperand : public MCParsedAsmOperand { + enum KindTy { + Token, + Immediate, + Expression + } Kind; + + SMLoc StartLoc, EndLoc; + bool IsPPC64; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct ImmOp { + int64_t Val; + }; + + struct ExprOp { + const MCExpr *Val; + }; + + union { + struct TokOp Tok; + struct ImmOp Imm; + struct ExprOp Expr; + }; + + PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} +public: + PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() { + Kind = o.Kind; + StartLoc = o.StartLoc; + EndLoc = o.EndLoc; + IsPPC64 = o.IsPPC64; + switch (Kind) { + case Token: + Tok = o.Tok; + break; + case Immediate: + Imm = o.Imm; + break; + case Expression: + Expr = o.Expr; + break; + } + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + + /// isPPC64 - True if this operand is for an instruction in 64-bit mode. + bool isPPC64() const { return IsPPC64; } + + int64_t getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + const MCExpr *getExpr() const { + assert(Kind == Expression && "Invalid access!"); + return Expr.Val; + } + + unsigned getReg() const { + assert(isRegNumber() && "Invalid access!"); + return (unsigned) Imm.Val; + } + + unsigned getCCReg() const { + assert(isCCRegNumber() && "Invalid access!"); + return (unsigned) Imm.Val; + } + + unsigned getCRBitMask() const { + assert(isCRBitMask() && "Invalid access!"); + return 7 - countTrailingZeros<uint64_t>(Imm.Val); + } + + bool isToken() const { return Kind == Token; } + bool isImm() const { return Kind == Immediate || Kind == Expression; } + bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } + bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } + bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); } + bool isU16Imm() const { return Kind == Expression || + (Kind == Immediate && isUInt<16>(getImm())); } + bool isS16Imm() const { return Kind == Expression || + (Kind == Immediate && isInt<16>(getImm())); } + bool isS16ImmX4() const { return Kind == Expression || + (Kind == Immediate && isInt<16>(getImm()) && + (getImm() & 3) == 0); } + bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } + bool isCCRegNumber() const { return Kind == Immediate && + isUInt<3>(getImm()); } + bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) && + isPowerOf2_32(getImm()); } + bool isMem() const { return false; } + bool isReg() const { return false; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("addRegOperands"); + } + + void addRegGPRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()])); + } + + void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()])); + } + + void addRegG8RCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()])); + } + + void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()])); + } + + void addRegGxRCOperands(MCInst &Inst, unsigned N) const { + if (isPPC64()) + addRegG8RCOperands(Inst, N); + else + addRegGPRCOperands(Inst, N); + } + + void addRegGxRCNoR0Operands(MCInst &Inst, unsigned N) const { + if (isPPC64()) + addRegG8RCNoX0Operands(Inst, N); + else + addRegGPRCNoR0Operands(Inst, N); + } + + void addRegF4RCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()])); + } + + void addRegF8RCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()])); + } + + void addRegVRRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()])); + } + + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getReg()])); + } + + void addRegCRRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()])); + } + + void addCRBitMaskOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()])); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (Kind == Immediate) + Inst.addOperand(MCOperand::CreateImm(getImm())); + else + Inst.addOperand(MCOperand::CreateExpr(getExpr())); + } + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + virtual void print(raw_ostream &OS) const; + + static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) { + PPCOperand *Op = new PPCOperand(Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + Op->IsPPC64 = IsPPC64; + return Op; + } + + static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { + PPCOperand *Op = new PPCOperand(Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + Op->IsPPC64 = IsPPC64; + return Op; + } + + static PPCOperand *CreateExpr(const MCExpr *Val, + SMLoc S, SMLoc E, bool IsPPC64) { + PPCOperand *Op = new PPCOperand(Expression); + Op->Expr.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + Op->IsPPC64 = IsPPC64; + return Op; + } +}; + +} // end anonymous namespace. + +void PPCOperand::print(raw_ostream &OS) const { + switch (Kind) { + case Token: + OS << "'" << getToken() << "'"; + break; + case Immediate: + OS << getImm(); + break; + case Expression: + getExpr()->print(OS); + break; + } +} + + +void PPCAsmParser:: +ProcessInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + switch (Inst.getOpcode()) { + case PPC::SLWI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::RLWINM); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(MCOperand::CreateImm(31 - N)); + Inst = TmpInst; + break; + } + case PPC::SRWI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::RLWINM); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(32 - N)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + TmpInst.addOperand(MCOperand::CreateImm(31)); + Inst = TmpInst; + break; + } + case PPC::SLDI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::RLDICR); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + TmpInst.addOperand(MCOperand::CreateImm(63 - N)); + Inst = TmpInst; + break; + } + case PPC::SRDI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::RLDICL); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(64 - N)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + Inst = TmpInst; + break; + } + } +} + +bool PPCAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + + switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { + default: break; + case Match_Success: + // Post-process instructions (typically extended mnemonics) + ProcessInstruction(Inst, Operands); + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst); + return false; + case Match_MissingFeature: + return Error(IDLoc, "instruction use requires an option to be enabled"); + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction mnemonic"); + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + } + + llvm_unreachable("Implement any new match types added!"); +} + +bool PPCAsmParser:: +MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) { + if (Tok.is(AsmToken::Identifier)) { + StringRef Name = Tok.getString(); + + if (Name.equals_lower("lr")) { + RegNo = isPPC64()? PPC::LR8 : PPC::LR; + IntVal = 8; + return false; + } else if (Name.equals_lower("ctr")) { + RegNo = isPPC64()? PPC::CTR8 : PPC::CTR; + IntVal = 9; + return false; + } else if (Name.substr(0, 1).equals_lower("r") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal]; + return false; + } else if (Name.substr(0, 1).equals_lower("f") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = FRegs[IntVal]; + return false; + } else if (Name.substr(0, 1).equals_lower("v") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = VRegs[IntVal]; + return false; + } else if (Name.substr(0, 2).equals_lower("cr") && + !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = CRRegs[IntVal]; + return false; + } + } + + return true; +} + +bool PPCAsmParser:: +ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + const AsmToken &Tok = Parser.getTok(); + StartLoc = Tok.getLoc(); + EndLoc = Tok.getEndLoc(); + RegNo = 0; + int64_t IntVal; + + if (!MatchRegisterName(Tok, RegNo, IntVal)) { + Parser.Lex(); // Eat identifier token. + return false; + } + + return Error(StartLoc, "invalid register name"); +} + +bool PPCAsmParser:: +ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + const MCExpr *EVal; + PPCOperand *Op; + + // Attempt to parse the next token as an immediate + switch (getLexer().getKind()) { + // Special handling for register names. These are interpreted + // as immediates corresponding to the register number. + case AsmToken::Percent: + Parser.Lex(); // Eat the '%'. + unsigned RegNo; + int64_t IntVal; + if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { + Parser.Lex(); // Eat the identifier token. + Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); + Operands.push_back(Op); + return false; + } + return Error(S, "invalid register name"); + + // All other expressions + case AsmToken::LParen: + case AsmToken::Plus: + case AsmToken::Minus: + case AsmToken::Integer: + case AsmToken::Identifier: + case AsmToken::Dot: + case AsmToken::Dollar: + if (!getParser().parseExpression(EVal)) + break; + /* fall through */ + default: + return Error(S, "unknown operand"); + } + + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(EVal)) + Op = PPCOperand::CreateImm(CE->getValue(), S, E, isPPC64()); + else + Op = PPCOperand::CreateExpr(EVal, S, E, isPPC64()); + + // Push the parsed operand into the list of operands + Operands.push_back(Op); + + // Check for D-form memory operands + if (getLexer().is(AsmToken::LParen)) { + Parser.Lex(); // Eat the '('. + S = Parser.getTok().getLoc(); + + int64_t IntVal; + switch (getLexer().getKind()) { + case AsmToken::Percent: + Parser.Lex(); // Eat the '%'. + unsigned RegNo; + if (MatchRegisterName(Parser.getTok(), RegNo, IntVal)) + return Error(S, "invalid register name"); + Parser.Lex(); // Eat the identifier token. + break; + + case AsmToken::Integer: + if (getParser().parseAbsoluteExpression(IntVal) || + IntVal < 0 || IntVal > 31) + return Error(S, "invalid register number"); + break; + + default: + return Error(S, "invalid memory operand"); + } + + if (getLexer().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "missing ')'"); + E = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the ')'. + + Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); + Operands.push_back(Op); + } + + return false; +} + +/// Parse an instruction mnemonic followed by its operands. +bool PPCAsmParser:: +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // The first operand is the token for the instruction name. + // If the instruction ends in a '.', we need to create a separate + // token for it, to match what TableGen is doing. + size_t Dot = Name.find('.'); + StringRef Mnemonic = Name.slice(0, Dot); + Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64())); + if (Dot != StringRef::npos) { + SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot); + StringRef DotStr = Name.slice(Dot, StringRef::npos); + Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64())); + } + + // If there are no more operands then finish + if (getLexer().is(AsmToken::EndOfStatement)) + return false; + + // Parse the first operand + if (ParseOperand(Operands)) + return true; + + while (getLexer().isNot(AsmToken::EndOfStatement) && + getLexer().is(AsmToken::Comma)) { + // Consume the comma token + getLexer().Lex(); + + // Parse the next operand + if (ParseOperand(Operands)) + return true; + } + + return false; +} + +/// ParseDirective parses the PPC specific directives +bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".word") + return ParseDirectiveWord(4, DirectiveID.getLoc()); + if (IDVal == ".tc") + return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); + return true; +} + +/// ParseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().parseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + +/// ParseDirectiveTC +/// ::= .tc [ symbol (, expression)* ] +bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { + // Skip TC symbol, which is only used with XCOFF. + while (getLexer().isNot(AsmToken::EndOfStatement) + && getLexer().isNot(AsmToken::Comma)) + Parser.Lex(); + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + + // Align to word size. + getParser().getStreamer().EmitValueToAlignment(Size); + + // Emit expressions. + return ParseDirectiveWord(Size, L); +} + +/// Force static initialization. +extern "C" void LLVMInitializePowerPCAsmParser() { + RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target); + RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "PPCGenAsmMatcher.inc" diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 6036428..e5c5204 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) @@ -26,12 +27,14 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCTargetObjectFile.cpp PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp ) add_dependencies(LLVMPowerPCCodeGen intrinsics_gen) +add_subdirectory(AsmParser) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index bacc108..432167e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -129,7 +129,10 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << (short)MI->getOperand(OpNo).getImm(); + if (MI->getOperand(OpNo).isImm()) + O << (short)MI->getOperand(OpNo).getImm(); + else + printOperand(MI, OpNo, O); } void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, @@ -137,22 +140,14 @@ void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, O << (unsigned short)MI->getOperand(OpNo).getImm(); } -void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) - O << (short)(MI->getOperand(OpNo).getImm()*4); - else - printOperand(MI, OpNo, O); -} - void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (!MI->getOperand(OpNo).isImm()) return printOperand(MI, OpNo, O); // Branches can take an immediate operand. This is used by the branch - // selection pass to print $+8, an eight byte displacement from the PC. - O << "$+"; + // selection pass to print .+8, an eight byte displacement from the PC. + O << ".+"; printAbsAddrOperand(MI, OpNo, O); } @@ -182,7 +177,7 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printSymbolLo(MI, OpNo, O); + printS16ImmOperand(MI, OpNo, O); O << '('; if (MI->getOperand(OpNo+1).getReg() == PPC::R0) O << "0"; @@ -191,22 +186,6 @@ void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo, O << ')'; } -void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) - printS16X4ImmOperand(MI, OpNo, O); - else - printSymbolLo(MI, OpNo, O); - O << '('; - - if (MI->getOperand(OpNo+1).getReg() == PPC::R0) - O << "0"; - else - printOperand(MI, OpNo+1, O); - O << ')'; -} - - void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O) { // When used as the base register, r0 reads constant zero rather than @@ -256,39 +235,4 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, assert(Op.isExpr() && "unknown operand kind in printOperand"); O << *Op.getExpr(); } - -void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) - return printS16ImmOperand(MI, OpNo, O); - - // FIXME: This is a terrible hack because we can't encode lo16() as an operand - // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower. - if (MI->getOperand(OpNo).isExpr() && - isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) { - O << "lo16("; - printOperand(MI, OpNo, O); - O << ')'; - } else { - printOperand(MI, OpNo, O); - } -} - -void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) - return printS16ImmOperand(MI, OpNo, O); - - // FIXME: This is a terrible hack because we can't encode lo16() as an operand - // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower. - if (MI->getOperand(OpNo).isExpr() && - isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) { - O << "ha16("; - printOperand(MI, OpNo, O); - O << ')'; - } else { - printOperand(MI, OpNo, O); - } -} - diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 8f1e211..f64a329 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -50,19 +50,13 @@ public: void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O); - - // FIXME: Remove - void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt index 95fac54..7b3e843 100644 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index b674883..45be471 100644 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMPowerPCDesc PPCMCTargetDesc.cpp PPCMCAsmInfo.cpp PPCMCCodeEmitter.cpp + PPCMCExpr.cpp PPCPredicates.cpp PPCELFObjectWriter.cpp ) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index ec26574..3fa2e09 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -22,7 +22,7 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { switch (Kind) { default: llvm_unreachable("Unknown fixup kind!"); @@ -37,19 +37,35 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { return Value & 0xfffc; case PPC::fixup_ppc_br24: return Value & 0x3fffffc; -#if 0 - case PPC::fixup_ppc_hi16: - return (Value >> 16) & 0xffff; -#endif - case PPC::fixup_ppc_ha16: - return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; - case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_half16: return Value & 0xffff; - case PPC::fixup_ppc_lo16_ds: + case PPC::fixup_ppc_half16ds: return Value & 0xfffc; } } +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_1: + return 1; + case FK_Data_2: + case PPC::fixup_ppc_half16: + case PPC::fixup_ppc_half16ds: + return 2; + case FK_Data_4: + case PPC::fixup_ppc_brcond14: + case PPC::fixup_ppc_br24: + return 4; + case FK_Data_8: + return 8; + case PPC::fixup_ppc_tlsreg: + case PPC::fixup_ppc_nofixup: + return 0; + } +} + namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { public: @@ -77,9 +93,8 @@ public: // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_ppc_lo16", 16, 16, 0 }, - { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo16_ds", 16, 14, 0 }, + { "fixup_ppc_half16", 0, 16, 0 }, + { "fixup_ppc_half16ds", 0, 14, 0 }, { "fixup_ppc_tlsreg", 0, 0, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; @@ -98,12 +113,13 @@ public: if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); // For each byte of the fragment that the fixup touches, mask in the bits // from the fixup value. The Value has been "split up" into the appropriate // bitfields above. - for (unsigned i = 0; i != 4; ++i) - Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff); } bool mayNeedRelaxation(const MCInst &Inst) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 81a86dc..7188f93 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -33,26 +33,9 @@ namespace { virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const; - virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); - - virtual void sortRelocs(const MCAssembler &Asm, - std::vector<ELFRelocationEntry> &Relocs); - }; - - class PPCELFRelocationEntry : public ELFRelocationEntry { - public: - PPCELFRelocationEntry(const ELFRelocationEntry &RE); - bool operator<(const PPCELFRelocationEntry &RE) const { - return (RE.r_offset < r_offset || - (RE.r_offset == r_offset && RE.Type > Type)); - } }; } -PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE) - : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol, - RE.r_addend, *RE.Fixup) {} - PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) : MCELFObjectTargetWriter(Is64Bit, OSABI, Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, @@ -98,7 +81,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_brcond14: Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_ break; - case PPC::fixup_ppc_ha16: + case PPC::fixup_ppc_half16: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_PPC_TPREL16_HA: @@ -107,7 +90,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: Type = ELF::R_PPC64_DTPREL16_HA; break; - case MCSymbolRefExpr::VK_None: + case MCSymbolRefExpr::VK_PPC_ADDR16_HA: Type = ELF::R_PPC_ADDR16_HA; break; case MCSymbolRefExpr::VK_PPC_TOC16_HA: @@ -122,11 +105,6 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: Type = ELF::R_PPC64_GOT_TLSLD16_HA; break; - } - break; - case PPC::fixup_ppc_lo16: - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_PPC_TPREL16_LO: Type = ELF::R_PPC_TPREL16_LO; break; @@ -134,6 +112,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, Type = ELF::R_PPC64_DTPREL16_LO; break; case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16; + break; + case MCSymbolRefExpr::VK_PPC_ADDR16_LO: Type = ELF::R_PPC_ADDR16_LO; break; case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: @@ -150,12 +131,15 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, break; } break; - case PPC::fixup_ppc_lo16_ds: + case PPC::fixup_ppc_half16ds: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC64_ADDR16_DS; break; + case MCSymbolRefExpr::VK_PPC_ADDR16_LO: + Type = ELF::R_PPC64_ADDR16_LO_DS; + break; case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: Type = ELF::R_PPC64_TOC16_DS; break; @@ -231,47 +215,6 @@ const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Targe return NULL; } -void PPCELFObjectWriter:: -adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { - switch ((unsigned)Fixup.getKind()) { - case PPC::fixup_ppc_ha16: - case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_lo16_ds: - RelocOffset += 2; - break; - default: - break; - } -} - -// The standard sorter only sorts on the r_offset field, but PowerPC can -// have multiple relocations at the same offset. Sort secondarily on the -// relocation type to avoid nondeterminism. -void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm, - std::vector<ELFRelocationEntry> &Relocs) { - - // Copy to a temporary vector of relocation entries having a different - // sort function. - std::vector<PPCELFRelocationEntry> TmpRelocs; - - for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin(); - R != Relocs.end(); ++R) { - TmpRelocs.push_back(PPCELFRelocationEntry(*R)); - } - - // Sort in place by ascending r_offset and descending r_type. - array_pod_sort(TmpRelocs.begin(), TmpRelocs.end()); - - // Copy back to the original vector. - unsigned I = 0; - for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin(); - R != TmpRelocs.end(); ++R, ++I) { - Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type, - R->Symbol, R->r_addend, *R->Fixup); - } -} - - MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 86c44f5..3ea59f0 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -25,17 +25,13 @@ enum Fixups { /// branches. fixup_ppc_brcond14, - /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs - /// like 'li'. - fixup_ppc_lo16, + /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo) + /// or ha16(_foo) for instrs like 'li' or 'addis'. + fixup_ppc_half16, - /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs - /// like 'lis'. - fixup_ppc_ha16, - - /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with + /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with /// implied 2 zero bits for instrs like 'std'. - fixup_ppc_lo16_ds, + fixup_ppc_half16ds, /// fixup_ppc_tlsreg - Insert thread-pointer register number. fixup_ppc_tlsreg, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index a25d7fe..bb7ce6f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -59,6 +59,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + MinInstAlignment = 4; // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 2223cd6..31c73ae 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -48,10 +48,8 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getS16ImmEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, @@ -136,25 +134,14 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo, return 0; } -unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo, +unsigned PPCMCCodeEmitter::getS16ImmEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_ha16)); - return 0; -} - -unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); - - // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16)); + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_half16)); return 0; } @@ -170,8 +157,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16)); + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_half16)); return RegBits; } @@ -185,11 +172,11 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) - return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; + return ((getMachineOpValue(MI, MO, Fixups) >> 2) & 0x3FFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16_ds)); + Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_half16ds)); return RegBits; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp new file mode 100644 index 0000000..f0613ff --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -0,0 +1,108 @@ +//===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppcmcexpr" +#include "PPCMCExpr.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" + +using namespace llvm; + +const PPCMCExpr* +PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) PPCMCExpr(Kind, Expr); +} + +void PPCMCExpr::PrintImpl(raw_ostream &OS) const { + switch (Kind) { + default: llvm_unreachable("Invalid kind!"); + case VK_PPC_HA16: OS << "ha16"; break; + case VK_PPC_LO16: OS << "lo16"; break; + } + + OS << '('; + getSubExpr()->print(OS); + OS << ')'; +} + +bool +PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + MCValue Value; + + if (!getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) + return false; + + if (Value.isAbsolute()) { + int64_t Result = Value.getConstant(); + switch (Kind) { + default: + llvm_unreachable("Invalid kind!"); + case VK_PPC_HA16: + Result = ((Result >> 16) + ((Result & 0x8000) ? 1 : 0)) & 0xffff; + break; + case VK_PPC_LO16: + Result = Result & 0xffff; + break; + } + Res = MCValue::get(Result); + } else { + MCContext &Context = Layout->getAssembler().getContext(); + const MCSymbolRefExpr *Sym = Value.getSymA(); + MCSymbolRefExpr::VariantKind Modifier = Sym->getKind(); + if (Modifier != MCSymbolRefExpr::VK_None) + return false; + switch (Kind) { + default: + llvm_unreachable("Invalid kind!"); + case VK_PPC_HA16: + Modifier = MCSymbolRefExpr::VK_PPC_ADDR16_HA; + break; + case VK_PPC_LO16: + Modifier = MCSymbolRefExpr::VK_PPC_ADDR16_LO; + break; + } + Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context); + Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant()); + } + + return true; +} + +// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps +// that method should be made public? +static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { + switch (Value->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); + AddValueSymbols_(BE->getLHS(), Asm); + AddValueSymbols_(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: + Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); + break; + } +} + +void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const { + AddValueSymbols_(getSubExpr(), Asm); +} diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h new file mode 100644 index 0000000..a080537 --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -0,0 +1,78 @@ +//===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCMCEXPR_H +#define PPCMCEXPR_H + +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCAsmLayout.h" + +namespace llvm { + +class PPCMCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_PPC_None, + VK_PPC_HA16, + VK_PPC_LO16 + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr) + : Kind(_Kind), Expr(_Expr) {} + +public: + /// @name Construction + /// @{ + + static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + static const PPCMCExpr *CreateHa16(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_PPC_HA16, Expr, Ctx); + } + + static const PPCMCExpr *CreateLo16(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_PPC_LO16, Expr, Ctx); + } + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// @} + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const; + void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } + + // There are no TLS PPCMCExprs at the moment. + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2209f93..2da30f9 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -58,7 +58,7 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; @@ -69,9 +69,10 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { MAI = new PPCLinuxMCAsmInfo(isPPC64); // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0); - MAI->addInitialFrameState(0, Dst, Src); + unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1; + MCCFIInstruction Inst = + MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0); + MAI->addInitialFrameState(Inst); return MAI; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 444758c..3ab9005 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -32,7 +32,8 @@ namespace PPC { PRED_GT = (1 << 5) | 12, PRED_NE = (2 << 5) | 4, PRED_UN = (3 << 5) | 12, - PRED_NU = (3 << 5) | 4 + PRED_NU = (3 << 5) | 4, + PRED_BAD = 0 }; /// Invert the specified predicate. != -> ==, < -> >=. diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index 1617b26..6666694 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -12,12 +12,12 @@ LIBRARYNAME = LLVMPowerPCCodeGen TARGET = PPC # Make sure that tblgen is run, first thing. -BUILT_SOURCES = PPCGenRegisterInfo.inc \ +BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \ PPCGenMCCodeEmitter.inc -DIRS = InstPrinter TargetInfo MCTargetDesc +DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index b4be51a..2e79610 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -30,7 +30,10 @@ namespace llvm { class AsmPrinter; class MCInst; - FunctionPass *createPPCCTRLoops(); + FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM); +#ifndef NDEBUG + FunctionPass *createPPCCTRLoopsVerify(); +#endif FunctionPass *createPPCEarlyReturnPass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 649ffc1..eb73c67 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -268,9 +268,14 @@ def PPCAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def PPCAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; let AssemblyWriters = [PPCAsmWriter]; + let AssemblyParsers = [PPCAsmParser]; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 3c7cc4e..c43b5c9 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "PPC.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" +#include "MCTargetDesc/PPCMCExpr.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/MapVector.h" @@ -910,6 +911,9 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext); + const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext); + const MCExpr *Sub = + MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext); // mflr r0 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); @@ -919,21 +923,20 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // mflr r11 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) - const MCExpr *Sub = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext), - Anon, OutContext); + const MCExpr *SubHa16 = PPCMCExpr::CreateHa16(Sub, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) - .addExpr(Sub)); + .addExpr(SubHa16)); // mtlr r0 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) + const MCExpr *SubLo16 = PPCMCExpr::CreateLo16(Sub, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) - .addExpr(Sub).addExpr(Sub) + .addExpr(SubLo16).addExpr(SubLo16) .addReg(PPC::R11)); // mtctr r12 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); @@ -967,24 +970,22 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { MCSymbol *Stub = Stubs[i].first; MCSymbol *RawSym = Stubs[i].second.getPointer(); MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); + const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext); OutStreamer.SwitchSection(StubSection); EmitAlignment(4); OutStreamer.EmitLabel(Stub); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + // lis r11, ha16(LazyPtr) - const MCExpr *LazyPtrHa16 = - MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16, - OutContext); + const MCExpr *LazyPtrHa16 = PPCMCExpr::CreateHa16(LazyPtrExpr, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS) .addReg(PPC::R11) .addExpr(LazyPtrHa16)); - const MCExpr *LazyPtrLo16 = - MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16, - OutContext); // ldu r12, lo16(LazyPtr)(r11) // lwzu r12, lo16(LazyPtr)(r11) + const MCExpr *LazyPtrLo16 = PPCMCExpr::CreateLo16(LazyPtrExpr, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index bd1c378..3e608ca 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -112,15 +112,21 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) { + MachineBasicBlock *Dest = 0; + if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) + Dest = I->getOperand(2).getMBB(); + else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || + I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && + !I->getOperand(0).isImm()) + Dest = I->getOperand(0).getMBB(); + + if (!Dest) { MBBStartOffset += TII->GetInstSizeInBytes(I); continue; } // Determine the offset from the current branch to the destination // block. - MachineBasicBlock *Dest = I->getOperand(2).getMBB(); - int BranchSize; if (Dest->getNumber() <= MBB.getNumber()) { // If this is a backwards branch, the delta is the offset from the diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 81a54d7..08247c2 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -9,767 +9,619 @@ // // This pass identifies loops where we can generate the PPC branch instructions // that decrement and test the count register (CTR) (bdnz and friends). -// This pass is based on the HexagonHardwareLoops pass. // // The pattern that defines the induction variable can changed depending on // prior optimizations. For example, the IndVarSimplify phase run by 'opt' // normalizes induction variables, and the Loop Strength Reduction pass // run by 'llc' may also make changes to the induction variable. -// The pattern detected by this phase is due to running Strength Reduction. // // Criteria for CTR loops: // - Countable loops (w/ ind. var for a trip count) -// - Assumes loops are normalized by IndVarSimplify // - Try inner-most loops first // - No nested CTR loops. // - No function calls in loops. // -// Note: As with unconverted loops, PPCBranchSelector must be run after this -// pass in order to convert long-displacement jumps into jump pairs. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ctrloops" -#include "PPC.h" -#include "MCTargetDesc/PPCPredicates.h" -#include "PPCTargetMachine.h" -#include "llvm/ADT/DenseMap.h" + +#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "PPCTargetMachine.h" +#include "PPC.h" + +#ifndef NDEBUG +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#endif + #include <algorithm> +#include <vector> using namespace llvm; +#ifndef NDEBUG +static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); +#endif + STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); namespace llvm { void initializePPCCTRLoopsPass(PassRegistry&); +#ifndef NDEBUG + void initializePPCCTRLoopsVerifyPass(PassRegistry&); +#endif } namespace { - class CountValue; - struct PPCCTRLoops : public MachineFunctionPass { - MachineLoopInfo *MLI; - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; + struct PPCCTRLoops : public FunctionPass { + +#ifndef NDEBUG + static int Counter; +#endif public: - static char ID; // Pass identification, replacement for typeid + static char ID; - PPCCTRLoops() : MachineFunctionPass(ID) { + PPCCTRLoops() : FunctionPass(ID), TM(0) { + initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); + } + PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "PPC CTR Loops"; } + virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); - MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + AU.addRequired<ScalarEvolution>(); } private: - /// getCanonicalInductionVariable - Check to see if the loop has a canonical - /// induction variable. - /// Should be defined in MachineLoop. Based upon version in class Loop. - void getCanonicalInductionVariable(MachineLoop *L, - SmallVector<MachineInstr *, 4> &IVars, - SmallVector<MachineInstr *, 4> &IOps) const; - - /// getTripCount - Return a loop-invariant LLVM register indicating the - /// number of times the loop will be executed. If the trip-count cannot - /// be determined, this return null. - CountValue *getTripCount(MachineLoop *L, - SmallVector<MachineInstr *, 2> &OldInsts) const; - - /// isInductionOperation - Return true if the instruction matches the - /// pattern for an opertion that defines an induction variable. - bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; - - /// isInvalidOperation - Return true if the instruction is not valid within - /// a CTR loop. - bool isInvalidLoopOperation(const MachineInstr *MI) const; - - /// containsInavlidInstruction - Return true if the loop contains an - /// instruction that inhibits using the CTR loop. - bool containsInvalidInstruction(MachineLoop *L) const; - - /// converToCTRLoop - Given a loop, check if we can convert it to a - /// CTR loop. If so, then perform the conversion and return true. - bool convertToCTRLoop(MachineLoop *L); - - /// isDead - Return true if the instruction is now dead. - bool isDead(const MachineInstr *MI, - SmallVector<MachineInstr *, 1> &DeadPhis) const; - - /// removeIfDead - Remove the instruction if it is now dead. - void removeIfDead(MachineInstr *MI); + bool mightUseCTR(const Triple &TT, BasicBlock *BB); + bool convertToCTRLoop(Loop *L); + + private: + PPCTargetMachine *TM; + LoopInfo *LI; + ScalarEvolution *SE; + DataLayout *TD; + DominatorTree *DT; + const TargetLibraryInfo *LibInfo; }; char PPCCTRLoops::ID = 0; +#ifndef NDEBUG + int PPCCTRLoops::Counter = 0; +#endif - - // CountValue class - Abstraction for a trip count of a loop. A - // smaller vesrsion of the MachineOperand class without the concerns - // of changing the operand representation. - class CountValue { +#ifndef NDEBUG + struct PPCCTRLoopsVerify : public MachineFunctionPass { public: - enum CountValueType { - CV_Register, - CV_Immediate - }; - private: - CountValueType Kind; - union Values { - unsigned RegNum; - int64_t ImmVal; - Values(unsigned r) : RegNum(r) {} - Values(int64_t i) : ImmVal(i) {} - } Contents; - bool isNegative; + static char ID; - public: - CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), - isNegative(neg) {} - explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), - isNegative(i < 0) {} - CountValueType getType() const { return Kind; } - bool isReg() const { return Kind == CV_Register; } - bool isImm() const { return Kind == CV_Immediate; } - bool isNeg() const { return isNegative; } - - unsigned getReg() const { - assert(isReg() && "Wrong CountValue accessor"); - return Contents.RegNum; - } - void setReg(unsigned Val) { - Contents.RegNum = Val; - } - int64_t getImm() const { - assert(isImm() && "Wrong CountValue accessor"); - if (isNegative) { - return -Contents.ImmVal; - } - return Contents.ImmVal; - } - void setImm(int64_t Val) { - Contents.ImmVal = Val; + PPCCTRLoopsVerify() : MachineFunctionPass(ID) { + initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); } - void print(raw_ostream &OS, const TargetMachine *TM = 0) const { - if (isReg()) { OS << PrintReg(getReg()); } - if (isImm()) { OS << getImm(); } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + MachineDominatorTree *MDT; }; + + char PPCCTRLoopsVerify::ID = 0; +#endif // NDEBUG } // end anonymous namespace INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -/// isCompareEquals - Returns true if the instruction is a compare equals -/// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, - bool &Int64Cmp) { - if (MI->getOpcode() == PPC::CMPWI) { - SignedCmp = true; - Int64Cmp = false; - return true; - } else if (MI->getOpcode() == PPC::CMPDI) { - SignedCmp = true; - Int64Cmp = true; - return true; - } else if (MI->getOpcode() == PPC::CMPLWI) { - SignedCmp = false; - Int64Cmp = false; - return true; - } else if (MI->getOpcode() == PPC::CMPLDI) { - SignedCmp = false; - Int64Cmp = true; - return true; - } - - return false; +FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) { + return new PPCCTRLoops(TM); } +#ifndef NDEBUG +INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", + "PowerPC CTR Loops Verify", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", + "PowerPC CTR Loops Verify", false, false) -/// createPPCCTRLoops - Factory for creating -/// the CTR loop phase. -FunctionPass *llvm::createPPCCTRLoops() { - return new PPCCTRLoops(); +FunctionPass *llvm::createPPCCTRLoopsVerify() { + return new PPCCTRLoopsVerify(); } +#endif // NDEBUG +bool PPCCTRLoops::runOnFunction(Function &F) { + LI = &getAnalysis<LoopInfo>(); + SE = &getAnalysis<ScalarEvolution>(); + DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<DataLayout>(); + LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>(); -bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********* PPC CTR Loops *********\n"); - - bool Changed = false; + bool MadeChange = false; - // get the loop information - MLI = &getAnalysis<MachineLoopInfo>(); - // get the register information - MRI = &MF.getRegInfo(); - // the target specific instructio info. - TII = MF.getTarget().getInstrInfo(); - - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { - MachineLoop *L = *I; - if (!L->getParentLoop()) { - Changed |= convertToCTRLoop(L); - } + Loop *L = *I; + if (!L->getParentLoop()) + MadeChange |= convertToCTRLoop(L); } - return Changed; + return MadeChange; } -/// getCanonicalInductionVariable - Check to see if the loop has a canonical -/// induction variable. We check for a simple recurrence pattern - an -/// integer recurrence that decrements by one each time through the loop and -/// ends at zero. If so, return the phi node that corresponds to it. -/// -/// Based upon the similar code in LoopInfo except this code is specific to -/// the machine. -/// This method assumes that the IndVarSimplify pass has been run by 'opt'. -/// -void -PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, - SmallVector<MachineInstr *, 4> &IVars, - SmallVector<MachineInstr *, 4> &IOps) const { - MachineBasicBlock *TopMBB = L->getTopBlock(); - MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); - assert(PI != TopMBB->pred_end() && - "Loop must have more than one incoming edge!"); - MachineBasicBlock *Backedge = *PI++; - if (PI == TopMBB->pred_end()) return; // dead loop - MachineBasicBlock *Incoming = *PI++; - if (PI != TopMBB->pred_end()) return; // multiple backedges? - - // make sure there is one incoming and one backedge and determine which - // is which. - if (L->contains(Incoming)) { - if (L->contains(Backedge)) - return; - std::swap(Incoming, Backedge); - } else if (!L->contains(Backedge)) - return; - - // Loop over all of the PHI nodes, looking for a canonical induction variable: - // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". - // - The recurrence comes from the backedge. - // - the definition is an induction operatio.n - for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); - I != E && I->isPHI(); ++I) { - MachineInstr *MPhi = &*I; - unsigned DefReg = MPhi->getOperand(0).getReg(); - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - // Check each operand for the value from the backedge. - MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); - if (L->contains(MBB)) { // operands comes from the backedge - // Check if the definition is an induction operation. - MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); - if (isInductionOperation(DI, DefReg)) { - IOps.push_back(DI); - IVars.push_back(MPhi); +bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { + for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); + J != JE; ++J) { + if (CallInst *CI = dyn_cast<CallInst>(J)) { + if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { + // Inline ASM is okay, unless it clobbers the ctr register. + InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); + for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { + InlineAsm::ConstraintInfo &C = CIV[i]; + if (C.Type != InlineAsm::isInput) + for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) + if (StringRef(C.Codes[j]).equals_lower("{ctr}")) + return true; } - } - } - } - return; -} -/// getTripCount - Return a loop-invariant LLVM value indicating the -/// number of times the loop will be executed. The trip count can -/// be either a register or a constant value. If the trip-count -/// cannot be determined, this returns null. -/// -/// We find the trip count from the phi instruction that defines the -/// induction variable. We follow the links to the CMP instruction -/// to get the trip count. -/// -/// Based upon getTripCount in LoopInfo. -/// -CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, - SmallVector<MachineInstr *, 2> &OldInsts) const { - MachineBasicBlock *LastMBB = L->getExitingBlock(); - // Don't generate a CTR loop if the loop has more than one exit. - if (LastMBB == 0) - return 0; - - MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); - if (LastI->getOpcode() != PPC::BCC) - return 0; - - // We need to make sure that this compare is defining the condition - // register actually used by the terminating branch. - - unsigned PredReg = LastI->getOperand(1).getReg(); - DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); - - unsigned PredCond = LastI->getOperand(0).getImm(); - if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) - return 0; - - // Check that the loop has a induction variable. - SmallVector<MachineInstr *, 4> IVars, IOps; - getCanonicalInductionVariable(L, IVars, IOps); - for (unsigned i = 0; i < IVars.size(); ++i) { - MachineInstr *IOp = IOps[i]; - MachineInstr *IV_Inst = IVars[i]; - - // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', - // if Imm is 0, get the count from the PHI opnd - // if Imm is -M, than M is the count - // Otherwise, Imm is the count - MachineOperand *IV_Opnd; - const MachineOperand *InitialValue; - if (!L->contains(IV_Inst->getOperand(2).getMBB())) { - InitialValue = &IV_Inst->getOperand(1); - IV_Opnd = &IV_Inst->getOperand(3); - } else { - InitialValue = &IV_Inst->getOperand(3); - IV_Opnd = &IV_Inst->getOperand(1); - } + continue; + } - DEBUG(dbgs() << "Considering:\n"); - DEBUG(dbgs() << " induction operation: " << *IOp); - DEBUG(dbgs() << " induction variable: " << *IV_Inst); - DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); - - // Look for the cmp instruction to determine if we - // can get a useful trip count. The trip count can - // be either a register or an immediate. The location - // of the value depends upon the type (reg or imm). - for (MachineRegisterInfo::reg_iterator - RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); - RI != RE; ++RI) { - IV_Opnd = &RI.getOperand(); - bool SignedCmp, Int64Cmp; - MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && - MI->getOperand(0).getReg() == PredReg) { - - OldInsts.push_back(MI); - OldInsts.push_back(IOp); - - DEBUG(dbgs() << " compare: " << *MI); - - const MachineOperand &MO = MI->getOperand(2); - assert(MO.isImm() && "IV Cmp Operand should be an immediate"); - - int64_t ImmVal; - if (SignedCmp) - ImmVal = (short) MO.getImm(); - else - ImmVal = MO.getImm(); - - const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); - assert(L->contains(IV_DefInstr->getParent()) && - "IV definition should occurs in loop"); - int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); - - assert(InitialValue->isReg() && "Expecting register for init value"); - unsigned InitialValueReg = InitialValue->getReg(); - - MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); - - // Here we need to look for an immediate load (an li or lis/ori pair). - if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || - DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = DefInstr->getOperand(2).getImm(); - MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(1).getReg()); - if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || - DefInstr2->getOpcode() == PPC::LIS)) { - DEBUG(dbgs() << " initial constant: " << *DefInstr); - DEBUG(dbgs() << " initial constant: " << *DefInstr2); - - start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; - - int64_t count = ImmVal - start; - if ((count % iv_value) != 0) { - return 0; - } - - OldInsts.push_back(DefInstr); - OldInsts.push_back(DefInstr2); - - // count/iv_value, the trip count, should be positive here. If it - // is negative, that indicates that the counter will wrap. - if (Int64Cmp) - return new CountValue(count/iv_value); - else - return new CountValue(uint32_t(count/iv_value)); - } - } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || - DefInstr->getOpcode() == PPC::LI)) { - DEBUG(dbgs() << " initial constant: " << *DefInstr); - - int64_t count = ImmVal - - int64_t(short(DefInstr->getOperand(1).getImm())); - if ((count % iv_value) != 0) { - return 0; + if (!TM) + return true; + const TargetLowering *TLI = TM->getTargetLowering(); + + if (Function *F = CI->getCalledFunction()) { + // Most intrinsics don't become function calls, but some might. + // sin, cos, exp and log are always calls. + unsigned Opcode; + if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { + switch (F->getIntrinsicID()) { + default: continue; + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + + case Intrinsic::setjmp: + +#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) + // let's return it to _setjmp state +# pragma pop_macro("setjmp") +# undef setjmp_undefined_for_msvc +#endif + + case Intrinsic::longjmp: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + case Intrinsic::powi: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::pow: + case Intrinsic::sin: + case Intrinsic::cos: + return true; + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; } + } - OldInsts.push_back(DefInstr); - - if (Int64Cmp) - return new CountValue(count/iv_value); - else - return new CountValue(uint32_t(count/iv_value)); - } else if (iv_value == 1 || iv_value == -1) { - // We can't determine a constant starting value. - if (ImmVal == 0) { - return new CountValue(InitialValueReg, iv_value > 0); + // PowerPC does not use [US]DIVREM or other library calls for + // operations on regular types which are not otherwise library calls + // (i.e. soft float or atomics). If adapting for targets that do, + // additional care is required here. + + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && LibInfo && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + // Non-read-only functions are never treated as intrinsics. + if (!CI->onlyReadsMemory()) + return true; + + // Conversion happens only for FP calls. + if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) + return true; + + switch (Func) { + default: return true; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: + continue; // ISD::FCOPYSIGN is never a library call. + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + continue; // ISD::FABS is never a library call. + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + Opcode = ISD::FSQRT; break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + Opcode = ISD::FFLOOR; break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + Opcode = ISD::FNEARBYINT; break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + Opcode = ISD::FCEIL; break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + Opcode = ISD::FRINT; break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + Opcode = ISD::FTRUNC; break; } - // FIXME: handle non-zero end value. + + MVT VTy = + TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true); + if (VTy == MVT::Other) + return true; + + if (TLI->isOperationLegalOrCustom(Opcode, VTy)) + continue; + else if (VTy.isVector() && + TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) + continue; + + return true; } - // FIXME: handle non-unit increments (we might not want to introduce - // division but we can handle some 2^n cases with shifts). - } - } - } - return 0; -} - -/// isInductionOperation - return true if the operation is matches the -/// pattern that defines an induction variable: -/// addi iv, c -/// -bool -PPCCTRLoops::isInductionOperation(const MachineInstr *MI, - unsigned IVReg) const { - return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) && - MI->getOperand(1).isReg() && // could be a frame index instead - MI->getOperand(1).getReg() == IVReg); -} -/// isInvalidOperation - Return true if the operation is invalid within -/// CTR loop. -bool -PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const { - - // call is not allowed because the callee may use a CTR loop - if (MI->getDesc().isCall()) { - return true; - } - // check if the instruction defines a CTR loop register - // (this will also catch nested CTR loops) - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef() && - (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) { return true; - } - } - return false; -} + } else if (isa<BinaryOperator>(J) && + J->getType()->getScalarType()->isPPC_FP128Ty()) { + // Most operations on ppc_f128 values become calls. + return true; + } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || + isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { + CastInst *CI = cast<CastInst>(J); + if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || + CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || + (TT.isArch32Bit() && + (CI->getSrcTy()->getScalarType()->isIntegerTy(64) || + CI->getDestTy()->getScalarType()->isIntegerTy(64)) + )) + return true; + } else if (TT.isArch32Bit() && + J->getType()->getScalarType()->isIntegerTy(64) && + (J->getOpcode() == Instruction::UDiv || + J->getOpcode() == Instruction::SDiv || + J->getOpcode() == Instruction::URem || + J->getOpcode() == Instruction::SRem)) { + return true; + } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { + // On PowerPC, indirect jumps use the counter register. + return true; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { + if (!TM) + return true; + const TargetLowering *TLI = TM->getTargetLowering(); -/// containsInvalidInstruction - Return true if the loop contains -/// an instruction that inhibits the use of the CTR loop function. -/// -bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const { - const std::vector<MachineBasicBlock*> Blocks = L->getBlocks(); - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *MBB = Blocks[i]; - for (MachineBasicBlock::iterator - MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { - const MachineInstr *MI = &*MII; - if (isInvalidLoopOperation(MI)) { + if (TLI->supportJumpTables() && + SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries()) return true; - } } } + return false; } -/// isDead returns true if the instruction is dead -/// (this was essentially copied from DeadMachineInstructionElim::isDead, but -/// with special cases for inline asm, physical registers and instructions with -/// side effects removed) -bool PPCCTRLoops::isDead(const MachineInstr *MI, - SmallVector<MachineInstr *, 1> &DeadPhis) const { - // Examine each operand. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for - // the parent block, and the only use of that phi node is this - // instruction, then this instruction is dead: both it (and the phi - // node) can be removed. - MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); - if (llvm::next(I) == MRI->use_end() && - I.getOperand().getParent()->isPHI()) { - MachineInstr *OnePhi = I.getOperand().getParent(); - - for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { - const MachineOperand &OPO = OnePhi->getOperand(j); - if (OPO.isReg() && OPO.isDef()) { - unsigned OPReg = OPO.getReg(); - - MachineRegisterInfo::use_iterator nextJ; - for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg), - E = MRI->use_end(); J!=E; J=nextJ) { - nextJ = llvm::next(J); - MachineOperand& Use = J.getOperand(); - MachineInstr *UseMI = Use.getParent(); - - if (MI != UseMI) { - // The phi node has a user that is not MI, bail... - return false; - } - } - } - } +bool PPCCTRLoops::convertToCTRLoop(Loop *L) { + bool MadeChange = false; - DeadPhis.push_back(OnePhi); - } else { - // This def has a non-debug use. Don't delete the instruction! - return false; - } - } - } + Triple TT = Triple(L->getHeader()->getParent()->getParent()-> + getTargetTriple()); + if (!TT.isArch32Bit() && !TT.isArch64Bit()) + return MadeChange; // Unknown arch. type. + + // Process nested loops first. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + MadeChange |= convertToCTRLoop(*I); } - // If there are no defs with uses, the instruction is dead. - return true; -} + // If a nested loop has been converted, then we can't convert this loop. + if (MadeChange) + return MadeChange; + +#ifndef NDEBUG + // Stop trying after reaching the limit (if any). + int Limit = CTRLoopLimit; + if (Limit >= 0) { + if (Counter >= CTRLoopLimit) + return false; + Counter++; + } +#endif + + // We don't want to spill/restore the counter register, and so we don't + // want to use the counter register if the loop contains calls. + for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); + I != IE; ++I) + if (mightUseCTR(TT, *I)) + return MadeChange; + + SmallVector<BasicBlock*, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + BasicBlock *CountedExitBlock = 0; + const SCEV *ExitCount = 0; + BranchInst *CountedExitBranch = 0; + for (SmallVector<BasicBlock*, 4>::iterator I = ExitingBlocks.begin(), + IE = ExitingBlocks.end(); I != IE; ++I) { + const SCEV *EC = SE->getExitCount(L, *I); + DEBUG(dbgs() << "Exit Count for " << *L << " from block " << + (*I)->getName() << ": " << *EC << "\n"); + if (isa<SCEVCouldNotCompute>(EC)) + continue; + if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { + if (ConstEC->getValue()->isZero()) + continue; + } else if (!SE->isLoopInvariant(EC, L)) + continue; + + // We now have a loop-invariant count of loop iterations (which is not the + // constant zero) for which we know that this loop will not exit via this + // exisiting block. + + // We need to make sure that this block will run on every loop iteration. + // For this to be true, we must dominate all blocks with backedges. Such + // blocks are in-loop predecessors to the header block. + bool NotAlways = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { + if (!L->contains(*PI)) + continue; -void PPCCTRLoops::removeIfDead(MachineInstr *MI) { - // This procedure was essentially copied from DeadMachineInstructionElim + if (!DT->dominates(*I, *PI)) { + NotAlways = true; + break; + } + } - SmallVector<MachineInstr *, 1> DeadPhis; - if (isDead(MI, DeadPhis)) { - DEBUG(dbgs() << "CTR looping will remove: " << *MI); + if (NotAlways) + continue; - // It is possible that some DBG_VALUE instructions refer to this - // instruction. Examine each def operand for such references; - // if found, mark the DBG_VALUE as undef (but don't delete it). - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) + // Make sure this blocks ends with a conditional branch. + Instruction *TI = (*I)->getTerminator(); + if (!TI) + continue; + + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (!BI->isConditional()) continue; - unsigned Reg = MO.getReg(); - MachineRegisterInfo::use_iterator nextI; - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), - E = MRI->use_end(); I!=E; I=nextI) { - nextI = llvm::next(I); // I is invalidated by the setReg - MachineOperand& Use = I.getOperand(); - MachineInstr *UseMI = Use.getParent(); - if (UseMI==MI) - continue; - if (Use.isDebug()) // this might also be a instr -> phi -> instr case - // which can also be removed. - UseMI->getOperand(0).setReg(0U); - } - } - MI->eraseFromParent(); - for (unsigned i = 0; i < DeadPhis.size(); ++i) { - DeadPhis[i]->eraseFromParent(); - } + CountedExitBranch = BI; + } else + continue; + + // Note that this block may not be the loop latch block, even if the loop + // has a latch block. + CountedExitBlock = *I; + ExitCount = EC; + break; } + + if (!CountedExitBlock) + return MadeChange; + + BasicBlock *Preheader = L->getLoopPreheader(); + + // If we don't have a preheader, then insert one. If we already have a + // preheader, then we can use it (except if the preheader contains a use of + // the CTR register because some such uses might be reordered by the + // selection DAG after the mtctr instruction). + if (!Preheader || mightUseCTR(TT, Preheader)) + Preheader = InsertPreheaderForLoop(L, this); + if (!Preheader) + return MadeChange; + + DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); + + // Insert the count into the preheader and replace the condition used by the + // selected branch. + MadeChange = true; + + SCEVExpander SCEVE(*SE, "loopcnt"); + LLVMContext &C = SE->getContext(); + Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : + Type::getInt32Ty(C); + if (!ExitCount->getType()->isPointerTy() && + ExitCount->getType() != CountType) + ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); + ExitCount = SE->getAddExpr(ExitCount, + SE->getConstant(CountType, 1)); + Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, + Preheader->getTerminator()); + + IRBuilder<> CountBuilder(Preheader->getTerminator()); + Module *M = Preheader->getParent()->getParent(); + Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, + CountType); + CountBuilder.CreateCall(MTCTRFunc, ECValue); + + IRBuilder<> CondBuilder(CountedExitBranch); + Value *DecFunc = + Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); + Value *NewCond = CondBuilder.CreateCall(DecFunc); + Value *OldCond = CountedExitBranch->getCondition(); + CountedExitBranch->setCondition(NewCond); + + // The false branch must exit the loop. + if (!L->contains(CountedExitBranch->getSuccessor(0))) + CountedExitBranch->swapSuccessors(); + + // The old condition may be dead now, and may have even created a dead PHI + // (the original induction variable). + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + DeleteDeadPHIs(CountedExitBlock); + + ++NumCTRLoops; + return MadeChange; } -/// converToCTRLoop - check if the loop is a candidate for -/// converting to a CTR loop. If so, then perform the -/// transformation. -/// -/// This function works on innermost loops first. A loop can -/// be converted if it is a counting loop; either a register -/// value or an immediate. -/// -/// The code makes several assumptions about the representation -/// of the loop in llvm. -bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { - bool Changed = false; - // Process nested loops first. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { - Changed |= convertToCTRLoop(*I); - } - // If a nested loop has been converted, then we can't convert this loop. - if (Changed) { - return Changed; +#ifndef NDEBUG +static bool clobbersCTR(const MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) + return true; + } else if (MO.isRegMask()) { + if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) + return true; + } } - SmallVector<MachineInstr *, 2> OldInsts; - // Are we able to determine the trip count for the loop? - CountValue *TripCount = getTripCount(L, OldInsts); - if (TripCount == 0) { - DEBUG(dbgs() << "failed to get trip count!\n"); - return false; - } + return false; +} - if (TripCount->isImm()) { - DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); +static bool verifyCTRBranch(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I) { + MachineBasicBlock::iterator BI = I; + SmallSet<MachineBasicBlock *, 16> Visited; + SmallVector<MachineBasicBlock *, 8> Preds; + bool CheckPreds; + + if (I == MBB->begin()) { + Visited.insert(MBB); + goto queue_preds; + } else + --I; + +check_block: + Visited.insert(MBB); + if (I == MBB->end()) + goto queue_preds; + + CheckPreds = true; + for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { + unsigned Opc = I->getOpcode(); + if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { + CheckPreds = false; + break; + } - // FIXME: We currently can't form 64-bit constants - // (including 32-bit unsigned constants) - if (!isInt<32>(TripCount->getImm())) + if (I != BI && clobbersCTR(I)) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" << + MBB->getFullName() << ") instruction " << *I << + " clobbers CTR, invalidating " << "BB#" << + BI->getParent()->getNumber() << " (" << + BI->getParent()->getFullName() << ") instruction " << + *BI << "\n"); return false; - } + } - // Does the loop contain any invalid instructions? - if (containsInvalidInstruction(L)) { - return false; + if (I == IE) + break; } - MachineBasicBlock *Preheader = L->getLoopPreheader(); - // No preheader means there's not place for the loop instr. - if (Preheader == 0) { - return false; - } - MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); - DebugLoc dl; - if (InsertPos != Preheader->end()) - dl = InsertPos->getDebugLoc(); + if (!CheckPreds && Preds.empty()) + return true; - MachineBasicBlock *LastMBB = L->getExitingBlock(); - // Don't generate CTR loop if the loop has more than one exit. - if (LastMBB == 0) { - return false; - } - MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); - - // Determine the loop start. - MachineBasicBlock *LoopStart = L->getTopBlock(); - if (L->getLoopLatch() != LastMBB) { - // When the exit and latch are not the same, use the latch block as the - // start. - // The loop start address is used only after the 1st iteration, and the loop - // latch may contains instrs. that need to be executed after the 1st iter. - LoopStart = L->getLoopLatch(); - // Make sure the latch is a successor of the exit, otherwise it won't work. - if (!LastMBB->isSuccessor(LoopStart)) { + if (CheckPreds) { +queue_preds: + if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { + DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" << + BI->getParent()->getNumber() << " (" << + BI->getParent()->getFullName() << ") instruction " << + *BI << "\n"); return false; } + + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PIE = MBB->pred_end(); PI != PIE; ++PI) + Preds.push_back(*PI); } - // Convert the loop to a CTR loop - DEBUG(dbgs() << "Change to CTR loop at "; L->dump()); - - MachineFunction *MF = LastMBB->getParent(); - const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>(); - bool isPPC64 = Subtarget.isPPC64(); - - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - - unsigned CountReg; - if (TripCount->isReg()) { - // Create a copy of the loop count register. - const TargetRegisterClass *SrcRC = - MF->getRegInfo().getRegClass(TripCount->getReg()); - CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? - (unsigned) PPC::EXTSW_32_64 : - (unsigned) TargetOpcode::COPY; - BuildMI(*Preheader, InsertPos, dl, - TII->get(CopyOp), CountReg).addReg(TripCount->getReg()); - if (TripCount->isNeg()) { - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG), - CountReg).addReg(CountReg1); + do { + MBB = Preds.pop_back_val(); + if (!Visited.count(MBB)) { + I = MBB->getLastNonDebugInstr(); + goto check_block; } - } else { - assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); - // Put the trip count in a register for transfer into the count register. - - int64_t CountImm = TripCount->getImm(); - if (TripCount->isNeg()) - CountImm = -CountImm; - - CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (abs64(CountImm) > 0x7FFF) { - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm((CountImm >> 16) & 0xFFFF); - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI), - CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF); - } else { - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::LI8 : PPC::LI), - CountReg).addImm(CountImm); - } - } + } while (!Preds.empty()); - // Add the mtctr instruction to the beginning of the loop. - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg, - TripCount->isImm() ? RegState::Kill : 0); - - // Make sure the loop start always has a reference in the CFG. We need to - // create a BlockAddress operand to get this mechanism to work both the - // MachineBasicBlock and BasicBlock objects need the flag set. - LoopStart->setHasAddressTaken(); - // This line is needed to set the hasAddressTaken flag on the BasicBlock - // object - BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); - - // Replace the loop branch with a bdnz instruction. - dl = LastI->getDebugLoc(); - const std::vector<MachineBasicBlock*> Blocks = L->getBlocks(); - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *MBB = Blocks[i]; - if (MBB != Preheader) - MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR); - } + return true; +} - // The loop ends with either: - // - a conditional branch followed by an unconditional branch, or - // - a conditional branch to the loop start. - assert(LastI->getOpcode() == PPC::BCC && - "loop end must start with a BCC instruction"); - // Either the BCC branches to the beginning of the loop, or it - // branches out of the loop and there is an unconditional branch - // to the start of the loop. - MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB(); - BuildMI(*LastMBB, LastI, dl, - TII->get((BranchTarget == LoopStart) ? - (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : - (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget); - - // Conditional branch; just delete it. - DEBUG(dbgs() << "Removing old branch: " << *LastI); - LastMBB->erase(LastI); - - delete TripCount; - - // The induction operation (add) and the comparison (cmpwi) may now be - // unneeded. If these are unneeded, then remove them. - for (unsigned i = 0; i < OldInsts.size(); ++i) - removeIfDead(OldInsts[i]); +bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { + MDT = &getAnalysis<MachineDominatorTree>(); + + // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before + // any other instructions that might clobber the ctr register. + for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); + I != IE; ++I) { + MachineBasicBlock *MBB = I; + if (!MDT->isReachableFromEntry(MBB)) + continue; + + for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), + MIIE = MBB->end(); MII != MIIE; ++MII) { + unsigned Opc = MII->getOpcode(); + if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || + Opc == PPC::BDZ8 || Opc == PPC::BDZ) + if (!verifyCTRBranch(MBB, MII)) + llvm_unreachable("Invalid PPC CTR loop!"); + } + } - ++NumCTRLoops; - return true; + return false; } +#endif // NDEBUG diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 6478718..40e4968 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -64,8 +64,7 @@ namespace { unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getS16ImmEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; @@ -194,21 +193,19 @@ unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI, return 0; } -unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI, - unsigned OpNo) const { +unsigned PPCCodeEmitter::getS16ImmEncoding(const MachineInstr &MI, + unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); - MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high)); - return 0; -} + unsigned RelocID; + switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) { + default: llvm_unreachable("Unsupported target operand flags!"); + case PPCII::MO_HA16: RelocID = PPC::reloc_absolute_high; break; + case PPCII::MO_LO16: RelocID = PPC::reloc_absolute_low; break; + } -unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI, - unsigned OpNo) const { - const MachineOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); - - MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low)); + MCE.addRelocation(GetRelocation(MO, RelocID)); return 0; } @@ -237,7 +234,7 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, const MachineOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) - return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits; + return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits; MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix)); return RegBits; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 9ec10f6..dabe613 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -334,6 +334,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); DebugLoc dl; bool needsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); @@ -400,13 +401,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (HasFP) BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) .addReg(PPC::X31) - .addImm(FPOffset/4) + .addImm(FPOffset) .addReg(PPC::X1); if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) .addReg(PPC::X0) - .addImm(LROffset / 4) + .addImm(LROffset) .addReg(PPC::X1); if (!MustSaveCRs.empty()) @@ -500,7 +501,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) .addReg(PPC::X1) - .addImm(NegFrameSize / 4) + .addImm(NegFrameSize) .addReg(PPC::X1); } else { BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) @@ -515,8 +516,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } } - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - // Add the "machine moves" for the instructions we generated above, but in // reverse order. if (needsFrameMoves) { @@ -525,25 +524,22 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel); // Show update of SP. - if (NegFrameSize) { - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - } else { - MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31); - Moves.push_back(MachineMove(FrameLabel, SP, SP)); - } + assert(NegFrameSize); + MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize)); if (HasFP) { - MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset); - MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31); - Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); + unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31; + Reg = MRI.getDwarfRegNum(Reg, true); + MMI.addFrameInst( + MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset)); } if (MustSaveLR) { - MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); - MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR); - Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc)); + unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR; + Reg = MRI.getDwarfRegNum(Reg, true); + MMI.addFrameInst( + MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset)); } } @@ -567,10 +563,10 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Mark effective beginning of when frame pointer is ready. BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); - MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : - (isPPC64 ? PPC::X1 : PPC::R1)); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); + unsigned Reg = HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) + : (isPPC64 ? PPC::X1 : PPC::R1); + Reg = MRI.getDwarfRegNum(Reg, true); + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg)); } } @@ -600,16 +596,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { - MachineLocation CSDst(PPC::X1, 8); - MachineLocation CSSrc(PPC::CR2); - Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + MMI.addFrameInst(MCCFIInstruction::createOffset( + Label, MRI.getDwarfRegNum(PPC::CR2, true), 8)); continue; } int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + MMI.addFrameInst(MCCFIInstruction::createOffset( + Label, MRI.getDwarfRegNum(Reg, true), Offset)); } } } @@ -747,7 +741,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, if (isPPC64) { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) - .addImm(LROffset/4).addReg(PPC::X1); + .addImm(LROffset).addReg(PPC::X1); if (!MustSaveCRs.empty()) BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12) @@ -755,7 +749,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, if (HasFP) BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) - .addImm(FPOffset/4).addReg(PPC::X1); + .addImm(FPOffset).addReg(PPC::X1); if (!MustSaveCRs.empty()) for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) @@ -1170,6 +1164,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, FuncInfo->addMustSaveCR(Reg); } else { CRSpilled = true; + FuncInfo->setSpillsCR(); // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 4bf1e33..0df50e1 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -71,8 +71,8 @@ void PPCScoreboardHazardRecognizer::Reset() { // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". // -PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) - : TII(tii) { +PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM) + : TM(TM) { EndDispatchGroup(); } @@ -91,7 +91,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, bool &isFirst, bool &isSingle, bool &isCracked, bool &isLoad, bool &isStore) { - const MCInstrDesc &MCID = TII.get(Opcode); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode); isLoad = MCID.mayLoad(); isStore = MCID.mayStore(); diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 55b45d0..84b8e6d 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -43,7 +43,7 @@ public: /// setting the CTR register then branching through it within a dispatch group), /// or storing then loading from the same address within a dispatch group. class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { - const TargetInstrInfo &TII; + const TargetMachine &TM; unsigned NumIssued; // Number of insts issued, including advanced cycles. @@ -64,7 +64,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { unsigned NumStores; public: - PPCHazardRecognizer970(const TargetInstrInfo &TII); + PPCHazardRecognizer970(const TargetMachine &TM); virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index aed0fbb..e006945 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -110,13 +110,13 @@ namespace { /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. - SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl); + SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl); /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG); + return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -145,11 +145,11 @@ namespace { return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } - /// SelectAddrImmShift - Returns true if the address N can be represented by - /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable - /// for use by STD and friends. - bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); + /// SelectAddrImmX4 - Returns true if the address N can be represented by + /// a base register plus a signed 16-bit displacement that is a multiple of 4. + /// Suitable for use by STD and friends. + bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true); } // Select an address into a single register. @@ -332,17 +332,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { if (isShiftedMask_32(Val)) { // look for the first non-zero bit - MB = CountLeadingZeros_32(Val); + MB = countLeadingZeros(Val); // look for the first zero bit after the run of ones - ME = CountLeadingZeros_32((Val - 1) ^ Val); + ME = countLeadingZeros((Val - 1) ^ Val); return true; } else { Val = ~Val; // invert mask if (isShiftedMask_32(Val)) { // effectively look for the first zero bit - ME = CountLeadingZeros_32(Val) - 1; + ME = countLeadingZeros(Val) - 1; // effectively look for the first one bit after the run of zeros - MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1; + MB = countLeadingZeros((Val - 1) ^ Val) + 1; return true; } } @@ -397,7 +397,7 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); APInt LKZ, LKO, RKZ, RKO; CurDAG->ComputeMaskedBits(Op0, LKZ, LKO); @@ -466,7 +466,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { /// SelectCC - Select a comparison of the specified values with the specified /// condition code, returning the CR# of the expression. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, - ISD::CondCode CC, DebugLoc dl) { + ISD::CondCode CC, SDLoc dl) { // Always select the LHS. unsigned Opc; @@ -710,7 +710,7 @@ static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned Imm; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); @@ -894,7 +894,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. SDNode *PPCDAGToDAGISel::Select(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isMachineOpcode()) return NULL; // Already selected. @@ -912,7 +912,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // If it can't be represented as a 32 bit value. if (!isInt<32>(Imm)) { - Shift = CountTrailingZeros_64(Imm); + Shift = countTrailingZeros<uint64_t>(Imm); int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; // If the shifted value fits 32 bits. @@ -1242,6 +1242,15 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { getI32Imm(BROpc) }; return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4); } + case PPCISD::BDNZ: + case PPCISD::BDZ: { + bool IsPPC64 = PPCSubTarget.isPPC64(); + SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; + return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? + (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : + (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), + MVT::Other, Ops, 2); + } case PPCISD::COND_BRANCH: { // Op #0 is the Chain. // Op #1 is the PPC::PRED_* number. @@ -1519,7 +1528,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { // immediate operand, add it now. if (ReplaceFlags) { if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); } else if (ConstantPoolSDNode *CP = diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3fcafdc..e2433e7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16,6 +16,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" +#include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -36,21 +37,6 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -64,14 +50,15 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); + if (TM.getSubtargetImpl()->isSVR4ABI()) + return new PPC64LinuxTargetObjectFile(); + return new TargetLoweringObjectFileELF(); } PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); - PPCRegInfo = TM.getRegisterInfo(); - PPCII = TM.getInstrInfo(); setPow2DivIsCheap(); @@ -309,6 +296,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + // To handle counter-based loop conditions. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); + // Comparisons that require checking two conditions. setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); @@ -529,9 +519,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); setTargetDAGCombine(ISD::BSWAP); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { @@ -642,6 +634,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::LARX: return "PPCISD::LARX"; case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; + case PPCISD::BDNZ: return "PPCISD::BDNZ"; + case PPCISD::BDZ: return "PPCISD::BDZ"; case PPCISD::MFFS: return "PPCISD::MFFS"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; @@ -662,10 +656,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; + case PPCISD::SC: return "PPCISD::SC"; } } -EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { +EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); @@ -1038,20 +1033,23 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better -/// represented as reg+reg. +/// represented as reg+reg. If Aligned is true, only accept displacements +/// suitable for STD and friends, i.e. multiples of 4. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG) const { + SelectionDAG &DAG, + bool Aligned) const { // FIXME dl should come from parent load or store, not from address - DebugLoc dl = N.getDebugLoc(); + SDLoc dl(N); // If this can be more profitably realized as r+r, fail. if (SelectAddressRegReg(N, Disp, Base, DAG)) return false; if (N.getOpcode() == ISD::ADD) { short imm = 0; - if (isIntS16Immediate(N.getOperand(1), imm)) { - Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); + if (isIntS16Immediate(N.getOperand(1), imm) && + (!Aligned || (imm & 3) == 0)) { + Disp = DAG.getTargetConstant(imm, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); } else { @@ -1072,7 +1070,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } } else if (N.getOpcode() == ISD::OR) { short imm = 0; - if (isIntS16Immediate(N.getOperand(1), imm)) { + if (isIntS16Immediate(N.getOperand(1), imm) && + (!Aligned || (imm & 3) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. @@ -1083,7 +1082,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If all of the bits are known zero on the LHS or RHS, the add won't // carry. Base = N.getOperand(0); - Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); + Disp = DAG.getTargetConstant(imm, N.getValueType()); return true; } } @@ -1093,7 +1092,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" short Imm; - if (isIntS16Immediate(CN, Imm)) { + if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); @@ -1101,8 +1100,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } // Handle 32-bit sext immediates with LIS + addr mode. - if (CN->getValueType(0) == MVT::i32 || - (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { + if ((CN->getValueType(0) == MVT::i32 || + (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && + (!Aligned || (CN->getZExtValue() & 3) == 0)) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. @@ -1150,92 +1150,6 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, return true; } -/// SelectAddressRegImmShift - Returns true if the address N can be -/// represented by a base register plus a signed 14-bit displacement -/// [r+imm*4]. Suitable for use by STD and friends. -bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, - SDValue &Base, - SelectionDAG &DAG) const { - // FIXME dl should come from the parent load or store, not the address - DebugLoc dl = N.getDebugLoc(); - // If this can be more profitably realized as r+r, fail. - if (SelectAddressRegReg(N, Disp, Base, DAG)) - return false; - - if (N.getOpcode() == ISD::ADD) { - short imm = 0; - if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { - Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { - Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - } else { - Base = N.getOperand(0); - } - return true; // [r+i] - } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { - // Match LOAD (ADD (X, Lo(G))). - assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() - && "Cannot handle constant offsets yet!"); - Disp = N.getOperand(1).getOperand(0); // The global address. - assert(Disp.getOpcode() == ISD::TargetGlobalAddress || - Disp.getOpcode() == ISD::TargetConstantPool || - Disp.getOpcode() == ISD::TargetJumpTable); - Base = N.getOperand(0); - return true; // [&g+r] - } - } else if (N.getOpcode() == ISD::OR) { - short imm = 0; - if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { - // If this is an or of disjoint bitfields, we can codegen this as an add - // (for better address arithmetic) if the LHS and RHS of the OR are - // provably disjoint. - APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); - if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { - // If all of the bits are known zero on the LHS or RHS, the add won't - // carry. - Base = N.getOperand(0); - Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); - return true; - } - } - } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { - // Loading from a constant address. Verify low two bits are clear. - if ((CN->getZExtValue() & 3) == 0) { - // If this address fits entirely in a 14-bit sext immediate field, codegen - // this as "d, 0" - short Imm; - if (isIntS16Immediate(CN, Imm)) { - Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, - CN->getValueType(0)); - return true; - } - - // Fold the low-part of 32-bit absolute addresses into addr mode. - if (CN->getValueType(0) == MVT::i32 || - (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { - int Addr = (int)CN->getZExtValue(); - - // Otherwise, break this down into an LIS + disp. - Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); - Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); - unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; - Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0); - return true; - } - } - } - - Disp = DAG.getTargetConstant(0, getPointerTy()); - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) - Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else - Base = N; - return true; // [r+0] -} - - /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -1288,18 +1202,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, return true; } - // LDU/STU use reg+imm*4, others use reg+imm. + // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { - // reg + imm - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; - // reg + imm * 4. - if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) return false; } @@ -1355,7 +1267,7 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG) { EVT PtrVT = HiPart.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); - DebugLoc DL = HiPart.getDebugLoc(); + SDLoc DL(HiPart); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); @@ -1380,7 +1292,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, // The actual address of the GlobalValue is stored in the TOC. if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); - return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, + return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); } @@ -1401,7 +1313,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // The actual address of the GlobalValue is stored in the TOC. if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, + return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); } @@ -1429,7 +1341,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(); bool is64bit = PPCSubTarget.isPPC64(); @@ -1515,7 +1427,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - DebugLoc DL = GSDN->getDebugLoc(); + SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); // 64-bit SVR4 ABI code is always position-independent. @@ -1546,7 +1458,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // If we're comparing for equality to zero, expose the fact that this is // implented as a ctlz/srl pair on ppc, so that the dag combiner can @@ -1595,7 +1507,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); @@ -1706,7 +1618,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); @@ -1748,7 +1660,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the @@ -1842,18 +1754,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { return true; } -static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, @@ -1876,11 +1788,11 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 @@ -1931,7 +1843,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { if (PPCSubTarget.isSVR4ABI()) { @@ -1953,7 +1865,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: @@ -2170,14 +2082,14 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, - DebugLoc dl) const { + SDLoc dl) const { if (Flags.isSExt()) ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); else if (Flags.isZExt()) ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); - + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); } @@ -2213,7 +2125,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // @@ -2502,7 +2414,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // @@ -2600,17 +2512,17 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; - // FIXME: FuncArg and Ins[ArgNo] must reference the same argument. - // When passing anonymous aggregates, this is currently not true. - // See LowerFormalArguments_64SVR4 for a fix. Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { + unsigned CurArgIdx = 0; + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[ArgNo].OrigArgIndex; unsigned CurArgOffset = ArgOffset; @@ -3004,7 +2916,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, SmallVector<SDValue, 8> &MemOpChains, - DebugLoc dl) { + SDLoc dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; SDValue FIN = TailCallArgs[i].FrameIdxOp; @@ -3026,7 +2938,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int SPDiff, bool isPPC64, bool isDarwinABI, - DebugLoc dl) { + SDLoc dl) { if (SPDiff) { // Calculate the new stack slot for the return address. int SlotSize = isPPC64 ? 8 : 4; @@ -3083,7 +2995,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, SDValue &LROpOut, SDValue &FPOpOut, bool isDarwinABI, - DebugLoc dl) const { + SDLoc dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; @@ -3113,7 +3025,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), false, false, MachinePointerInfo(0), @@ -3128,7 +3040,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVector<SDValue, 8> &MemOpChains, SmallVector<TailCallArgumentInfo, 8> &TailCallArguments, - DebugLoc dl) { + SDLoc dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); if (!isTailCall) { if (isVector) { @@ -3149,7 +3061,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, - DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, + SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, bool isDarwinABI, SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) { MachineFunction &MF = DAG.getMachineFunction(); @@ -3171,13 +3083,13 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, // Emit callseq_end just before tailcall node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); + DAG.getIntPtrConstant(0, true), InFlag, dl); InFlag = Chain.getValue(1); } static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, - SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, + SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys, const PPCSubtarget &PPCSubTarget) { @@ -3363,7 +3275,7 @@ SDValue PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { SmallVector<CCValAssign, 16> RVLocs; @@ -3406,7 +3318,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, } SDValue -PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, +PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, bool isVarArg, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> @@ -3493,7 +3405,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), - InFlag); + InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); @@ -3505,7 +3417,7 @@ SDValue PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; + SDLoc &dl = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; SmallVector<SDValue, 32> &OutVals = CLI.OutVals; SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; @@ -3542,7 +3454,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. @@ -3628,7 +3540,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be moved somewhere else @@ -3679,7 +3592,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1)); + CallSeqStart.getNode()->getOperand(1), + SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); Chain = CallSeqStart = NewCallSeqStart; @@ -3755,13 +3669,14 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // The MEMCPY must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1)); + CallSeqStart.getNode()->getOperand(1), + SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); return NewCallSeqStart; @@ -3774,7 +3689,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { unsigned NumOps = Outs.size(); @@ -3815,7 +3730,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else @@ -3940,7 +3856,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // register. // FIXME: The memcpy seems to produce pretty awful code for // small aggregates, particularly for packed ones. - // FIXME: It would be preferable to use the slot in the + // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); @@ -4145,7 +4061,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { unsigned NumOps = Outs.size(); @@ -4186,7 +4102,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else @@ -4502,7 +4419,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + SDLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), @@ -4551,7 +4468,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { // When we pop the dynamic allocation we need to restore the SP link. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Get the corect type for pointers. EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -4636,7 +4553,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Get the corect type for pointers. EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -4653,7 +4570,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1)); @@ -4661,7 +4578,7 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1)); } @@ -4687,7 +4604,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT CmpVT = Op.getOperand(0).getValueType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. @@ -4768,7 +4685,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // FIXME: Split this code up when LegalizeDAGTypes lands. SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) @@ -4827,7 +4744,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); @@ -4961,7 +4878,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); /* The rounding mode is in bits 30:31 of FPSR, and has the following settings: @@ -5027,7 +4944,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"); @@ -5055,7 +4972,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && @@ -5083,7 +5000,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && @@ -5118,7 +5035,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { /// BuildSplatI - Build a canonical splati of Val with an element size of /// SplatSize. Cast the result to VT. static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG, SDLoc dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); static const EVT VTys[] = { // canonical VT to use for each size. @@ -5142,10 +5059,20 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); } +/// BuildIntrinsicOp - Return a unary operator intrinsic node with the +/// specified intrinsic ID. +static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, + SelectionDAG &DAG, SDLoc dl, + EVT DestVT = MVT::Other) { + if (DestVT == MVT::Other) DestVT = Op.getValueType(); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(IID, MVT::i32), Op); +} + /// BuildIntrinsicOp - Return a binary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, DebugLoc dl, + SelectionDAG &DAG, SDLoc dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, @@ -5156,7 +5083,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, SDValue Op2, SelectionDAG &DAG, - DebugLoc dl, EVT DestVT = MVT::Other) { + SDLoc dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); @@ -5166,7 +5093,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, - EVT VT, SelectionDAG &DAG, DebugLoc dl) { + EVT VT, SelectionDAG &DAG, SDLoc dl) { // Force LHS/RHS to be the right type. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); @@ -5185,7 +5112,7 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, // sequence of ops that should be used. SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); @@ -5341,7 +5268,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); @@ -5420,7 +5347,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); @@ -5587,7 +5514,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); int CompareOpc; bool isDot; if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) @@ -5651,7 +5578,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); @@ -5668,7 +5595,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); @@ -5755,7 +5682,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, - Op.getDebugLoc()); + SDLoc(Op)); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -5772,6 +5699,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + // For counter-based loop handling. + case ISD::INTRINSIC_W_CHAIN: return SDValue(); + // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); @@ -5782,10 +5712,26 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const { const TargetMachine &TM = getTargetMachine(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); + case ISD::INTRINSIC_W_CHAIN: { + if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != + Intrinsic::ppc_is_decremented_ctr_nonzero) + break; + + assert(N->getValueType(0) == MVT::i1 && + "Unexpected result type for CTR decrement intrinsic"); + EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); + SDVTList VTs = DAG.getVTList(SVT, MVT::Other); + SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), + N->getOperand(1)); + + Results.push_back(NewInt); + Results.push_back(NewInt.getValue(1)); + break; + } case ISD::VAARG: { if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() || TM.getSubtarget<PPCSubtarget>().isPPC64()) @@ -6101,7 +6047,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) - .addImm(TOCOffset / 4) + .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); @@ -6109,7 +6055,9 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); - MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); + const PPCRegisterInfo *TRI = + static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo()); + MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); @@ -6129,7 +6077,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, if (PPCSubTarget.isPPC64()) { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) .addReg(LabelReg) - .addImm(LabelOffset / 4) + .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) @@ -6202,7 +6150,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Reload IP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) - .addImm(LabelOffset / 4) + .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) @@ -6214,7 +6162,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Reload SP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) - .addImm(SPOffset / 4) + .addImm(SPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) @@ -6229,7 +6177,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Reload TOC if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) - .addImm(TOCOffset / 4) + .addImm(TOCOffset) .addReg(BufReg); MIB.setMemRefs(MMOBegin, MMOEnd); @@ -6272,8 +6220,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, Cond.push_back(MI->getOperand(1)); DebugLoc dl = MI->getDebugLoc(); - PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond, - MI->getOperand(2).getReg(), MI->getOperand(3).getReg()); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), + Cond, MI->getOperand(2).getReg(), + MI->getOperand(3).getReg()); } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || @@ -6717,7 +6667,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, ++Iterations; SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue FPOne = DAG.getConstantFP(1.0, VT.getScalarType()); @@ -6779,7 +6729,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, ++Iterations; SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue FPThreeHalves = DAG.getConstantFP(1.5, VT.getScalarType()); @@ -6823,11 +6773,120 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, return SDValue(); } +// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does +// not enforce equality of the chain operands. +static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, + unsigned Bytes, int Dist, + SelectionDAG &DAG) { + EVT VT = LS->getMemoryVT(); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LS->getBasePtr(); + SDValue BaseLoc = Base->getBasePtr(); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + + // Handle X+C + if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && + cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) + return true; + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const GlobalValue *GV1 = NULL; + const GlobalValue *GV2 = NULL; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + +// Return true is there is a nearyby consecutive load to the one provided +// (regardless of alignment). We search up and down the chain, looking though +// token factors and other loads (but nothing else). As a result, a true +// results indicates that it is safe to create a new consecutive load adjacent +// to the load provided. +static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { + SDValue Chain = LD->getChain(); + EVT VT = LD->getMemoryVT(); + + SmallSet<SDNode *, 16> LoadRoots; + SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); + SmallSet<SDNode *, 16> Visited; + + // First, search up the chain, branching to follow all token-factor operands. + // If we find a consecutive load, then we're done, otherwise, record all + // nodes just above the top-level loads and token factors. + while (!Queue.empty()) { + SDNode *ChainNext = Queue.pop_back_val(); + if (!Visited.insert(ChainNext)) + continue; + + if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) { + if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) + return true; + + if (!Visited.count(ChainLD->getChain().getNode())) + Queue.push_back(ChainLD->getChain().getNode()); + } else if (ChainNext->getOpcode() == ISD::TokenFactor) { + for (SDNode::op_iterator O = ChainNext->op_begin(), + OE = ChainNext->op_end(); O != OE; ++O) + if (!Visited.count(O->getNode())) + Queue.push_back(O->getNode()); + } else + LoadRoots.insert(ChainNext); + } + + // Second, search down the chain, starting from the top-level nodes recorded + // in the first phase. These top-level nodes are the nodes just above all + // loads and token factors. Starting with their uses, recursively look though + // all loads (just the chain uses) and token factors to find a consecutive + // load. + Visited.clear(); + Queue.clear(); + + for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), + IE = LoadRoots.end(); I != IE; ++I) { + Queue.push_back(*I); + + while (!Queue.empty()) { + SDNode *LoadRoot = Queue.pop_back_val(); + if (!Visited.insert(LoadRoot)) + continue; + + if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot)) + if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) + return true; + + for (SDNode::use_iterator UI = LoadRoot->use_begin(), + UE = LoadRoot->use_end(); UI != UE; ++UI) + if (((isa<LoadSDNode>(*UI) && + cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) || + UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) + Queue.push_back(*UI); + } + } + + return false; +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const TargetMachine &TM = getTargetMachine(); SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (N->getOpcode()) { default: break; case PPCISD::SHL: @@ -6868,7 +6927,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DCI); if (RV.getNode() != 0) { DCI.AddToWorklist(RV.getNode()); - RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(), + RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), N->getValueType(0), RV); DCI.AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), @@ -6881,7 +6940,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DCI); if (RV.getNode() != 0) { DCI.AddToWorklist(RV.getNode()); - RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(), + RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), N->getValueType(0), RV, N->getOperand(1).getOperand(1)); DCI.AddToWorklist(RV.getNode()); @@ -6999,6 +7058,157 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, cast<StoreSDNode>(N)->getMemOperand()); } break; + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT VT = LD->getValueType(0); + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); + if (ISD::isNON_EXTLoad(N) && VT.isVector() && + TM.getSubtarget<PPCSubtarget>().hasAltivec() && + DCI.getDAGCombineLevel() == AfterLegalizeTypes && + LD->getAlignment() < ABIAlignment) { + // This is a type-legal unaligned Altivec load. + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // This implements the loading of unaligned vectors as described in + // the venerable Apple Velocity Engine overview. Specifically: + // https://developer.apple.com/hardwaredrivers/ve/alignment.html + // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html + // + // The general idea is to expand a sequence of one or more unaligned + // loads into a alignment-based permutation-control instruction (lvsl), + // a series of regular vector loads (which always truncate their + // input address to an aligned address), and a series of permutations. + // The results of these permutations are the requested loaded values. + // The trick is that the last "extra" load is not taken from the address + // you might suspect (sizeof(vector) bytes after the last requested + // load), but rather sizeof(vector) - 1 bytes after the last + // requested vector. The point of this is to avoid a page fault if the + // base address happend to be aligned. This works because if the base + // address is aligned, then adding less than a full vector length will + // cause the last vector in the sequence to be (re)loaded. Otherwise, + // the next vector will be fetched as you might suspect was necessary. + + // We might be able to reuse the permutation generation from + // a different base address offset from this one by an aligned amount. + // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this + // optimization later. + SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr, + DAG, dl, MVT::v16i8); + + // Refine the alignment of the original load (a "new" load created here + // which was identical to the first except for the alignment would be + // merged with the existing node regardless). + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(LD->getPointerInfo(), + LD->getMemOperand()->getFlags(), + LD->getMemoryVT().getStoreSize(), + ABIAlignment); + LD->refineAlignment(MMO); + SDValue BaseLoad = SDValue(LD, 0); + + // Note that the value of IncOffset (which is provided to the next + // load's pointer info offset value, and thus used to calculate the + // alignment), and the value of IncValue (which is actually used to + // increment the pointer value) are different! This is because we + // require the next load to appear to be aligned, even though it + // is actually offset from the base pointer by a lesser amount. + int IncOffset = VT.getSizeInBits() / 8; + int IncValue = IncOffset; + + // Walk (both up and down) the chain looking for another load at the real + // (aligned) offset (the alignment of the other load does not matter in + // this case). If found, then do not use the offset reduction trick, as + // that will prevent the loads from being later combined (as they would + // otherwise be duplicates). + if (!findConsecutiveLoad(LD, DAG)) + --IncValue; + + SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + + SDValue ExtraLoad = + DAG.getLoad(VT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncOffset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), ABIAlignment); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + BaseLoad.getValue(1), ExtraLoad.getValue(1)); + + if (BaseLoad.getValueType() != MVT::v4i32) + BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad); + + if (ExtraLoad.getValueType() != MVT::v4i32) + ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); + + SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + BaseLoad, ExtraLoad, PermCntl, DAG, dl); + + if (VT != MVT::v4i32) + Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); + + // Now we need to be really careful about how we update the users of the + // original load. We cannot just call DCI.CombineTo (or + // DAG.ReplaceAllUsesWith for that matter), because the load still has + // uses created here (the permutation for example) that need to stay. + SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + while (UI != UE) { + SDUse &Use = UI.getUse(); + SDNode *User = *UI; + // Note: BaseLoad is checked here because it might not be N, but a + // bitcast of N. + if (User == Perm.getNode() || User == BaseLoad.getNode() || + User == TF.getNode() || Use.getResNo() > 1) { + ++UI; + continue; + } + + SDValue To = Use.getResNo() ? TF : Perm; + ++UI; + + SmallVector<SDValue, 8> Ops; + for (SDNode::op_iterator O = User->op_begin(), + OE = User->op_end(); O != OE; ++O) { + if (*O == Use) + Ops.push_back(To); + else + Ops.push_back(*O); + } + + DAG.UpdateNodeOperands(User, Ops.data(), Ops.size()); + } + + return SDValue(N, 0); + } + } + break; + case ISD::INTRINSIC_WO_CHAIN: + if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == + Intrinsic::ppc_altivec_lvsl && + N->getOperand(1)->getOpcode() == ISD::ADD) { + SDValue Add = N->getOperand(1); + + if (DAG.MaskedValueIsZero(Add->getOperand(1), + APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( + Add.getValueType().getScalarType().getSizeInBits()))) { + SDNode *BasePtr = Add->getOperand(0).getNode(); + for (SDNode::use_iterator UI = BasePtr->use_begin(), + UE = BasePtr->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == + Intrinsic::ppc_altivec_lvsl) { + // We've found another LVSL, and this address if an aligned + // multiple of that one. The results will be the same, so use the + // one we've just found instead. + + return SDValue(*UI, 0); + } + } + } + } case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && @@ -7097,6 +7307,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // compare down to code that is difficult to reassemble. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); + + // Sometimes the promoted value of the intrinsic is ANDed by some non-zero + // value. If so, pass-through the AND to get to the intrinsic. + if (LHS.getOpcode() == ISD::AND && + LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero && + isa<ConstantSDNode>(LHS.getOperand(1)) && + !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> + isZero()) + LHS = LHS.getOperand(0); + + if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero && + isa<ConstantSDNode>(RHS)) { + assert((CC == ISD::SETEQ || CC == ISD::SETNE) && + "Counter decrement comparison is not EQ or NE"); + + unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); + bool isBDNZ = (CC == ISD::SETEQ && Val) || + (CC == ISD::SETNE && !Val); + + // We now need to make the intrinsic dead (it cannot be instruction + // selected). + DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); + assert(LHS.getNode()->hasOneUse() && + "Counter decrement has more than one use"); + + return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, + N->getOperand(0), N->getOperand(4)); + } + int CompareOpc; bool isDot; @@ -7406,25 +7649,13 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } -/// isLegalAddressImmediate - Return true if the integer value can be used -/// as the offset of the target addressing mode for load / store of the -/// given type. -bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ - // PPC allows a sign-extended 16-bit immediate field. - return (V > -(1 << 16) && V < (1 << 16)-1); -} - -bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { - return false; -} - SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); // Make sure the function does not optimize away the store of the RA to @@ -7454,7 +7685,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 423e983..e85f96c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -20,6 +20,7 @@ #include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -146,6 +147,10 @@ namespace llvm { /// an optional input flag argument. COND_BRANCH, + /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based + /// loops. + BDNZ, BDZ, + /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding /// towards zero. Used only as part of the long double-to-int /// conversion sequence. @@ -175,61 +180,61 @@ namespace llvm { /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT - /// base to sym@got@tprel@ha. + /// base to sym\@got\@tprel\@ha. ADDIS_GOT_TPREL_HA, /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec /// TLS model, produces a LD instruction with base register G8RReg - /// and offset sym@got@tprel@l. This completes the addition that + /// and offset sym\@got\@tprel\@l. This completes the addition that /// finds the offset of "sym" relative to the thread pointer. LD_GOT_TPREL_L, /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS /// model, produces an ADD instruction that adds the contents of /// G8RReg to the thread pointer. Symbol contains a relocation - /// sym@tls which is to be replaced by the thread pointer and + /// sym\@tls which is to be replaced by the thread pointer and /// identifies to the linker that the instruction is part of a /// TLS sequence. ADD_TLS, /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym@got@tlsgd@ha. + /// register to sym\@got\@tlsgd\@ha. ADDIS_TLSGD_HA, /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@tlsgd@l. + /// sym\@got\@tlsgd\@l. ADDI_TLSGD_L, /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym@tlsgd). + /// model, produces a call to __tls_get_addr(sym\@tlsgd). GET_TLS_ADDR, /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym@got@tlsld@ha. + /// register to sym\@got\@tlsld\@ha. ADDIS_TLSLD_HA, /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@tlsld@l. + /// sym\@got\@tlsld\@l. ADDI_TLSLD_L, /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym@tlsld). + /// model, produces a call to __tls_get_addr(sym\@tlsld). GET_TLSLD_ADDR, /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the /// local-dynamic TLS model, produces an ADDIS8 instruction - /// that adds X3 to sym@dtprel@ha. The Chain operand is needed + /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed /// to tie this in place following a copy to %X3 from the result /// of a GET_TLSLD_ADDR. ADDIS_DTPREL_HA, /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym@got@dtprel@l. + /// sym\@got\@dtprel\@l. ADDI_DTPREL_L, /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded @@ -238,6 +243,10 @@ namespace llvm { /// optimizations due to constant folding. VADD_SPLAT, + /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned + /// operand identifies the operating system entry point. + SC, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -266,16 +275,16 @@ namespace llvm { /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, /// produces an ADDIS8 instruction that adds the TOC base register to - /// sym@toc@ha. + /// sym\@toc\@ha. ADDIS_TOC_HA, /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model, /// produces a LD instruction with base register G8RReg and offset - /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. LD_TOC_L, /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces - /// an ADDI8 instruction that adds G8RReg to sym@toc@l. + /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l. /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. ADDI_TOC_L }; @@ -327,8 +336,6 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; - const PPCRegisterInfo *PPCRegInfo; - const PPCInstrInfo *PPCII; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -340,7 +347,7 @@ namespace llvm { virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual EVT getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address @@ -358,21 +365,16 @@ namespace llvm { /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it - /// is not better represented as reg+reg. + /// is not better represented as reg+reg. If Aligned is true, only accept + /// displacements suitable for STD and friends, i.e. multiples of 4. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG) const; + SelectionDAG &DAG, bool Aligned) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; - /// SelectAddressRegImmShift - Returns true if the address N can be - /// represented by a base register plus a signed 14-bit displacement - /// [r+imm*4]. Suitable for use by STD and friends. - bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG) const; - Sched::Preference getSchedulingPreference(SDNode *N) const; /// LowerOperation - Provide custom lowering hooks for some operations. @@ -436,15 +438,6 @@ namespace llvm { /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; - /// isLegalAddressImmediate - Return true if the integer value can be used - /// as the offset of the target addressing mode for load / store of the - /// given type. - virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; - - /// isLegalAddressImmediate - Return true if the GlobalValue can be used as - /// the offset of the target addressing mode. - virtual bool isLegalAddressImmediate(GlobalValue *GV) const; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; /// getOptimalMemOpType - Returns the target specific optimal type for load @@ -459,7 +452,7 @@ namespace llvm { /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; @@ -490,7 +483,7 @@ namespace llvm { SDValue &LROpOut, SDValue &FPOpOut, bool isDarwinABI, - DebugLoc dl) const; + SDLoc dl) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; @@ -511,7 +504,7 @@ namespace llvm { SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; @@ -526,9 +519,9 @@ namespace llvm { SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; - SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall, + SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, bool isVarArg, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> @@ -543,7 +536,7 @@ namespace llvm { LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; virtual SDValue @@ -561,11 +554,11 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const; SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, - SDValue ArgVal, DebugLoc dl) const; + SDValue ArgVal, SDLoc dl) const; void setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, @@ -576,25 +569,25 @@ namespace llvm { LowerFormalArguments_Darwin(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue LowerFormalArguments_64SVR4(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue LowerFormalArguments_32SVR4(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, - SelectionDAG &DAG, DebugLoc dl) const; + SelectionDAG &DAG, SDLoc dl) const; SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, @@ -603,7 +596,7 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, @@ -612,7 +605,7 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, @@ -620,7 +613,7 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; @@ -629,6 +622,23 @@ namespace llvm { SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const; }; + + bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + + bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + + bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); } #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index e5d0b91..0245ba7 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -17,17 +17,12 @@ // def s16imm64 : Operand<i64> { let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getS16ImmEncoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; } def u16imm64 : Operand<i64> { let PrintMethod = "printU16ImmOperand"; -} -def symbolHi64 : Operand<i64> { - let PrintMethod = "printSymbolHi"; - let EncoderMethod = "getHA16Encoding"; -} -def symbolLo64 : Operand<i64> { - let PrintMethod = "printSymbolLo"; - let EncoderMethod = "getLO16Encoding"; + let ParserMatchClass = PPCU16ImmAsmOperand; } def tocentry : Operand<iPTR> { let MIOperandInfo = (ops i64imm:$imm); @@ -289,6 +284,12 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } +let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in { +let Pattern = [(int_ppc_mtctr i64:$rS)] in +def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), + "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), @@ -325,10 +326,10 @@ let Interpretation64Bit = 1 in { let neverHasSideEffects = 1 in { let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { -def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins symbolLo64:$imm), +def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm), "li $rD, $imm", IntSimple, - [(set i64:$rD, immSExt16:$imm)]>; -def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins symbolHi64:$imm), + [(set i64:$rD, imm64SExt16:$imm)]>; +def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s16imm64:$imm), "lis $rD, $imm", IntSimple, [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } @@ -400,18 +401,18 @@ defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), let Defs = [CARRY] in def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), "addic $rD, $rA, $imm", IntGeneral, - [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>; -def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolLo64:$imm), + [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>; +def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolHi64:$imm), + [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm), "addis $rD, $rA, $imm", IntSimple, [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), "subfic $rD, $rA, $imm", IntGeneral, - [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>; + [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>; defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "subfc", "$rT, $rA, $rB", IntGeneral, [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, @@ -746,25 +747,25 @@ def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64; // Support for thread-local storage. -def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp), +def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISgotTprelHA", [(set i64:$rD, (PPCaddisGotTprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins symbolLo64:$disp, g8rc_nox0:$reg), +def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg), "#LDgotTprelL", [(set i64:$rD, (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, isPPC64; def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), (ADD8TLS $in, tglobaltlsaddr:$g)>; -def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp), +def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsgdHA", [(set i64:$rD, (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp), +def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDItlsgdL", [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, @@ -774,12 +775,12 @@ def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), [(set i64:$rD, (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp), +def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsldHA", [(set i64:$rD, (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp), +def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDItlsldL", [(set i64:$rD, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, @@ -789,13 +790,13 @@ def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), [(set i64:$rD, (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp), +def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISdtprelHA", [(set i64:$rD, (PPCaddisDtprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp), +def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIdtprelL", [(set i64:$rD, (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>, diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 41b4e01..a244058 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -145,6 +145,19 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, let Inst{31} = lk; } +// 1.7.3 SC-Form +class SCForm<bits<6> opcode, bits<1> xo, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, + list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<7> LEV; + + let Pattern = pattern; + + let Inst{20-26} = LEV; + let Inst{30} = xo; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -365,6 +378,12 @@ class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>; +class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let RST = 0; +} + class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 847bd22..a3eeb20 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -47,7 +47,7 @@ cl::desc("Disable compare instruction optimization"), cl::Hidden); PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), - TM(tm), RI(*TM.getSubtargetImpl(), *this) {} + TM(tm), RI(*TM.getSubtargetImpl()) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for /// this target when scheduling the DAG. @@ -74,10 +74,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( // Most subtargets use a PPC970 recognizer. if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { - const TargetInstrInfo *TII = TM.getInstrInfo(); - assert(TII && "No InstrInfo?"); + assert(TM.getInstrInfo() && "No InstrInfo?"); - return new PPCHazardRecognizer970(*TII); + return new PPCHazardRecognizer970(TM); } return new PPCScoreboardHazardRecognizer(II, DAG); @@ -1096,8 +1095,11 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, int OpC = CmpInstr->getOpcode(); unsigned CRReg = CmpInstr->getOperand(0).getReg(); - bool isFP = OpC == PPC::FCMPUS || OpC == PPC::FCMPUD; - unsigned CRRecReg = isFP ? PPC::CR1 : PPC::CR0; + + // FP record forms set CR1 based on the execption status bits, not a + // comparison with zero. + if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD) + return false; // The record forms set the condition register based on a signed comparison // with zero (so says the ISA manual). This is not as straightforward as it @@ -1140,9 +1142,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, equalityOnly = true; } else return false; - } else if (!isFP) + } else equalityOnly = is64BitUnsignedCompare; - } else if (!isFP) + } else equalityOnly = is32BitUnsignedCompare; if (equalityOnly) { @@ -1153,25 +1155,19 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { unsigned Pred = UseMI->getOperand(0).getImm(); - if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) - continue; - - return false; + if (Pred != PPC::PRED_EQ && Pred != PPC::PRED_NE) + return false; } else if (UseMI->getOpcode() == PPC::ISEL || UseMI->getOpcode() == PPC::ISEL8) { unsigned SubIdx = UseMI->getOperand(3).getSubReg(); - if (SubIdx == PPC::sub_eq) - continue; - - return false; + if (SubIdx != PPC::sub_eq) + return false; } else return false; } } - // Get ready to iterate backward from CmpInstr. - MachineBasicBlock::iterator I = CmpInstr, E = MI, - B = CmpInstr->getParent()->begin(); + MachineBasicBlock::iterator I = CmpInstr; // Scan forward to find the first use of the compare. for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end(); @@ -1188,9 +1184,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, break; } - // Early exit if we're at the beginning of the BB. - if (I == B) return false; - // There are two possible candidates which can be changed to set CR[01]. // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). @@ -1210,13 +1203,18 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, // Search for Sub. const TargetRegisterInfo *TRI = &getRegisterInfo(); --I; + + // Get ready to iterate backward from CmpInstr. + MachineBasicBlock::iterator E = MI, + B = CmpInstr->getParent()->begin(); + for (; I != E && !noSub; --I) { const MachineInstr &Instr = *I; unsigned IOpC = Instr.getOpcode(); if (&*I != CmpInstr && ( - Instr.modifiesRegister(CRRecReg, TRI) || - Instr.readsRegister(CRRecReg, TRI))) + Instr.modifiesRegister(PPC::CR0, TRI) || + Instr.readsRegister(PPC::CR0, TRI))) // This instruction modifies or uses the record condition register after // the one we want to change. While we could do this transformation, it // would likely not be profitable. This transformation removes one @@ -1236,15 +1234,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, break; } - if (isFP && (IOpC == PPC::FSUB || IOpC == PPC::FSUBS) && - ((Instr.getOperand(1).getReg() == SrcReg && - Instr.getOperand(2).getReg() == SrcReg2) || - (Instr.getOperand(1).getReg() == SrcReg2 && - Instr.getOperand(2).getReg() == SrcReg))) { - Sub = &*I; - break; - } - if (I == B) // The 'and' is below the comparison instruction. return false; @@ -1290,8 +1279,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, // The operands to subf are the opposite of sub, so only in the fixed-point // case, invert the order. - if (!isFP) - ShouldSwap = !ShouldSwap; + ShouldSwap = !ShouldSwap; } if (ShouldSwap) @@ -1330,7 +1318,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, MachineBasicBlock::iterator MII = MI; BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(), get(TargetOpcode::COPY), CRReg) - .addReg(CRRecReg, MIOpC != NewOpC ? RegState::Kill : 0); + .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0); if (MIOpC != NewOpC) { // We need to be careful here: we're replacing one instruction with diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 7d3540e..1b7ea93 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -162,6 +162,10 @@ def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; +def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -246,13 +250,15 @@ def maskimm32 : PatLeaf<(imm), [{ return false; }]>; -def immSExt16 : PatLeaf<(imm), [{ - // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended - // field. Used by instructions like 'addi'. - if (N->getValueType(0) == MVT::i32) - return (int32_t)N->getZExtValue() == (short)N->getZExtValue(); - else - return (int64_t)N->getZExtValue() == (short)N->getZExtValue(); +def imm32SExt16 : Operand<i32>, ImmLeaf<i32, [{ + // imm32SExt16 predicate - True if the i32 immediate fits in a 16-bit + // sign extended field. Used by instructions like 'addi'. + return (int32_t)Imm == (short)Imm; +}]>; +def imm64SExt16 : Operand<i64>, ImmLeaf<i64, [{ + // imm64SExt16 predicate - True if the i64 immediate fits in a 16-bit + // sign extended field. Used by instructions like 'addi'. + return (int64_t)Imm == (short)Imm; }]>; def immZExt16 : PatLeaf<(imm), [{ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended @@ -283,7 +289,7 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ }], HI16>; // Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require -// restricted memrix (offset/4) constants are alignment sensitive. If these +// restricted memrix (4-aligned) constants are alignment sensitive. If these // offsets are hidden behind TOC entries than the values of the lower-order // bits cannot be checked directly. As a result, we need to also incorporate // an alignment check into the relevant patterns. @@ -342,30 +348,102 @@ class NoEncode<string E> { // all their register operands. // For this purpose, we define one RegisterOperand for each RegisterClass, // using the same name as the class, just in lower case. -def gprc : RegisterOperand<GPRC>; -def g8rc : RegisterOperand<G8RC>; -def gprc_nor0 : RegisterOperand<GPRC_NOR0>; -def g8rc_nox0 : RegisterOperand<G8RC_NOX0>; -def f8rc : RegisterOperand<F8RC>; -def f4rc : RegisterOperand<F4RC>; -def vrrc : RegisterOperand<VRRC>; -def crbitrc : RegisterOperand<CRBITRC>; -def crrc : RegisterOperand<CRRC>; +def PPCRegGPRCAsmOperand : AsmOperandClass { + let Name = "RegGPRC"; let PredicateMethod = "isRegNumber"; +} +def gprc : RegisterOperand<GPRC> { + let ParserMatchClass = PPCRegGPRCAsmOperand; +} +def PPCRegG8RCAsmOperand : AsmOperandClass { + let Name = "RegG8RC"; let PredicateMethod = "isRegNumber"; +} +def g8rc : RegisterOperand<G8RC> { + let ParserMatchClass = PPCRegG8RCAsmOperand; +} +def PPCRegGPRCNoR0AsmOperand : AsmOperandClass { + let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber"; +} +def gprc_nor0 : RegisterOperand<GPRC_NOR0> { + let ParserMatchClass = PPCRegGPRCNoR0AsmOperand; +} +def PPCRegG8RCNoX0AsmOperand : AsmOperandClass { + let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber"; +} +def g8rc_nox0 : RegisterOperand<G8RC_NOX0> { + let ParserMatchClass = PPCRegG8RCNoX0AsmOperand; +} +def PPCRegF8RCAsmOperand : AsmOperandClass { + let Name = "RegF8RC"; let PredicateMethod = "isRegNumber"; +} +def f8rc : RegisterOperand<F8RC> { + let ParserMatchClass = PPCRegF8RCAsmOperand; +} +def PPCRegF4RCAsmOperand : AsmOperandClass { + let Name = "RegF4RC"; let PredicateMethod = "isRegNumber"; +} +def f4rc : RegisterOperand<F4RC> { + let ParserMatchClass = PPCRegF4RCAsmOperand; +} +def PPCRegVRRCAsmOperand : AsmOperandClass { + let Name = "RegVRRC"; let PredicateMethod = "isRegNumber"; +} +def vrrc : RegisterOperand<VRRC> { + let ParserMatchClass = PPCRegVRRCAsmOperand; +} +def PPCRegCRBITRCAsmOperand : AsmOperandClass { + let Name = "RegCRBITRC"; let PredicateMethod = "isRegNumber"; +} +def crbitrc : RegisterOperand<CRBITRC> { + let ParserMatchClass = PPCRegCRBITRCAsmOperand; +} +def PPCRegCRRCAsmOperand : AsmOperandClass { + let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber"; +} +def crrc : RegisterOperand<CRRC> { + let ParserMatchClass = PPCRegCRRCAsmOperand; +} + +def PPCS5ImmAsmOperand : AsmOperandClass { + let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; + let RenderMethod = "addImmOperands"; +} def s5imm : Operand<i32> { let PrintMethod = "printS5ImmOperand"; + let ParserMatchClass = PPCS5ImmAsmOperand; +} +def PPCU5ImmAsmOperand : AsmOperandClass { + let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; + let RenderMethod = "addImmOperands"; } def u5imm : Operand<i32> { let PrintMethod = "printU5ImmOperand"; + let ParserMatchClass = PPCU5ImmAsmOperand; +} +def PPCU6ImmAsmOperand : AsmOperandClass { + let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; + let RenderMethod = "addImmOperands"; } def u6imm : Operand<i32> { let PrintMethod = "printU6ImmOperand"; + let ParserMatchClass = PPCU6ImmAsmOperand; +} +def PPCS16ImmAsmOperand : AsmOperandClass { + let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; + let RenderMethod = "addImmOperands"; } def s16imm : Operand<i32> { let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getS16ImmEncoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; +} +def PPCU16ImmAsmOperand : AsmOperandClass { + let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; + let RenderMethod = "addImmOperands"; } def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = PPCU16ImmAsmOperand; } def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; @@ -381,24 +459,44 @@ def calltarget : Operand<iPTR> { def aaddr : Operand<iPTR> { let PrintMethod = "printAbsAddrOperand"; } -def symbolHi: Operand<i32> { - let PrintMethod = "printSymbolHi"; - let EncoderMethod = "getHA16Encoding"; -} -def symbolLo: Operand<i32> { - let PrintMethod = "printSymbolLo"; - let EncoderMethod = "getLO16Encoding"; +def PPCCRBitMaskOperand : AsmOperandClass { + let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask"; } def crbitm: Operand<i8> { let PrintMethod = "printcrbitm"; let EncoderMethod = "get_crbitm_encoding"; + let ParserMatchClass = PPCCRBitMaskOperand; } // Address operands // A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). -def ptr_rc_nor0 : PointerLikeRegClass<1>; +def PPCRegGxRCNoR0Operand : AsmOperandClass { + let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber"; +} +def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> { + let ParserMatchClass = PPCRegGxRCNoR0Operand; +} +// A version of ptr_rc usable with the asm parser. +def PPCRegGxRCOperand : AsmOperandClass { + let Name = "RegGxRC"; let PredicateMethod = "isRegNumber"; +} +def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> { + let ParserMatchClass = PPCRegGxRCOperand; +} -def dispRI : Operand<iPTR>; -def dispRIX : Operand<iPTR>; +def PPCDispRIOperand : AsmOperandClass { + let Name = "DispRI"; let PredicateMethod = "isS16Imm"; + let RenderMethod = "addImmOperands"; +} +def dispRI : Operand<iPTR> { + let ParserMatchClass = PPCDispRIOperand; +} +def PPCDispRIXOperand : AsmOperandClass { + let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4"; + let RenderMethod = "addImmOperands"; +} +def dispRIX : Operand<iPTR> { + let ParserMatchClass = PPCDispRIXOperand; +} def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; @@ -407,10 +505,10 @@ def memri : Operand<iPTR> { } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg); + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg); } -def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. - let PrintMethod = "printMemRegImmShifted"; +def memrix : Operand<iPTR> { // memri where the imm is 4-aligned. + let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; } @@ -431,7 +529,7 @@ def pred : Operand<OtherVT> { def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; -def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" +def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std" // The address in a single register. This is used with the SjLj // pseudo-instructions. @@ -888,6 +986,12 @@ let isBranch = 1, isTerminator = 1 in { "#EH_SjLj_Setup\t$dst", []>; } +// System call. +let PPC970_Unit = 7 in { + def SC : SCForm<17, 1, (outs), (ins i32imm:$lev), + "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>; +} + // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, @@ -1290,41 +1394,41 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), // let PPC970_Unit = 1 in { // FXU Operations. -def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$imm), +def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>; + [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>; let BaseName = "addic" in { let Defs = [CARRY] in def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, - [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>, + [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>, RecFormRel, PPC970_DGroup_Cracked; let Defs = [CARRY, CR0] in def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>, isDOT, RecFormRel; } -def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolHi:$imm), +def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm), "addis $rD, $rA, $imm", IntSimple, [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; let isCodeGenOnly = 1 in -def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$sym), +def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym), "la $rD, $sym($rA)", IntGeneral, [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, - [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>; + [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>; let Defs = [CARRY] in def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "subfic $rD, $rA, $imm", IntGeneral, - [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>; + [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { - def LI : DForm_2_r0<14, (outs gprc:$rD), (ins symbolLo:$imm), + def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm), "li $rD, $imm", IntSimple, - [(set i32:$rD, immSExt16:$imm)]>; - def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins symbolHi:$imm), + [(set i32:$rD, imm32SExt16:$imm)]>; + def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s16imm:$imm), "lis $rD, $imm", IntSimple, [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } @@ -1591,6 +1695,12 @@ def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } +let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in { +let Pattern = [(int_ppc_mtctr i32:$rS)] in +def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), + "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} let Defs = [LR] in { def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), @@ -1905,7 +2015,7 @@ def : Pat<(or i32:$in, imm:$imm), def : Pat<(xor i32:$in, imm:$imm), (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // SUBFIC -def : Pat<(sub immSExt16:$imm, i32:$in), +def : Pat<(sub imm32SExt16:$imm, i32:$in), (SUBFIC $in, imm:$imm)>; // SHL/SRL @@ -2012,3 +2122,82 @@ def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B), include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" + + +//===----------------------------------------------------------------------===// +// PowerPC Instructions used for assembler/disassembler only +// + +def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), + "isync", SprISYNC, []>; + +def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), + "icbi $src", LdStICBI, []>; + +//===----------------------------------------------------------------------===// +// PowerPC Assembler Instruction Aliases +// + +// Pseudo-instructions for alternate assembly syntax (never used by codegen). +// These are aliases that require C++ handling to convert to the target +// instruction, while InstAliases can be handled directly by tblgen. +class PPCAsmPseudo<string asm, dag iops> + : Instruction { + let Namespace = "PPC"; + bit PPC64 = 0; // Default value, override with isPPC64 + + let OutOperandList = (outs); + let InOperandList = iops; + let Pattern = []; + let AsmString = asm; + let isAsmParserOnly = 1; + let isPseudo = 1; +} + +def : InstAlias<"sc", (SC 0)>; + +def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; + +def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; + +multiclass BranchExtendedMnemonic<string name, int bibo> { + def : InstAlias<"b"#name#" $cc, $dst", + (BCC bibo, crrc:$cc, condbrtarget:$dst)>; + def : InstAlias<"b"#name#" $dst", + (BCC bibo, CR0, condbrtarget:$dst)>; + + def : InstAlias<"b"#name#"lr $cc", + (BCLR bibo, crrc:$cc)>; + def : InstAlias<"b"#name#"lr", + (BCLR bibo, CR0)>; + + def : InstAlias<"b"#name#"ctr $cc", + (BCCTR bibo, crrc:$cc)>; + def : InstAlias<"b"#name#"ctr", + (BCCTR bibo, CR0)>; + + def : InstAlias<"b"#name#"ctrl $cc", + (BCCTRL bibo, crrc:$cc)>; + def : InstAlias<"b"#name#"ctrl", + (BCCTRL bibo, CR0)>; +} +defm : BranchExtendedMnemonic<"lt", 12>; +defm : BranchExtendedMnemonic<"gt", 44>; +defm : BranchExtendedMnemonic<"eq", 76>; +defm : BranchExtendedMnemonic<"un", 108>; +defm : BranchExtendedMnemonic<"so", 108>; +defm : BranchExtendedMnemonic<"ge", 4>; +defm : BranchExtendedMnemonic<"nl", 4>; +defm : BranchExtendedMnemonic<"le", 36>; +defm : BranchExtendedMnemonic<"ng", 36>; +defm : BranchExtendedMnemonic<"ne", 68>; +defm : BranchExtendedMnemonic<"nu", 100>; +defm : BranchExtendedMnemonic<"ns", 100>; + diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index f8cf3a5..ba7efc1 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "MCTargetDesc/PPCMCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -110,32 +111,32 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK; - switch (access) { - case PPCII::MO_HA16: RefKind = isDarwin ? - MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : - MCSymbolRefExpr::VK_PPC_GAS_HA16; - break; - case PPCII::MO_LO16: RefKind = isDarwin ? - MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : - MCSymbolRefExpr::VK_PPC_GAS_LO16; - break; - case PPCII::MO_TPREL16_HA: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA; - break; - case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; - break; - case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; - break; - case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; - break; - case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; - break; - } + if (!isDarwin) { + switch (access) { + case PPCII::MO_HA16: + RefKind = MCSymbolRefExpr::VK_PPC_ADDR16_HA; + break; + case PPCII::MO_LO16: + RefKind = MCSymbolRefExpr::VK_PPC_ADDR16_LO; + break; + case PPCII::MO_TPREL16_HA: + RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA; + break; + case PPCII::MO_TPREL16_LO: + RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; + break; + case PPCII::MO_DTPREL16_LO: + RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; + break; + case PPCII::MO_TLSLD16_LO: + RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; + break; + case PPCII::MO_TOC16_LO: + RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; + break; + } + } - // FIXME: This isn't right, but we don't have a good way to express this in - // the MC Level, see below. - if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) - RefKind = MCSymbolRefExpr::VK_None; - const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); if (!MO.isJTI() && MO.getOffset()) @@ -149,10 +150,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx); - // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16, - // since it is not a symbol! } - + + // Add Darwin ha16() / lo16() markers if required. + if (isDarwin) { + switch (access) { + case PPCII::MO_HA16: + Expr = PPCMCExpr::CreateHa16(Expr, Ctx); + break; + case PPCII::MO_LO16: + Expr = PPCMCExpr::CreateLo16(Expr, Ctx); + break; + } + } + return MCOperand::CreateExpr(Expr); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2be6324..a4e328e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -48,12 +48,11 @@ using namespace llvm; -PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, - const TargetInstrInfo &tii) +PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, ST.isPPC64() ? 0 : 1), - Subtarget(ST), TII(tii) { + Subtarget(ST) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -136,6 +135,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::FP); Reserved.set(PPC::FP8); + // The counter registers must be reserved so that counter-based loops can + // be correctly formed (and the mtctr instructions are not DCE'd). + Reserved.set(PPC::CTR); + Reserved.set(PPC::CTR8); + Reserved.set(PPC::R1); Reserved.set(PPC::LR); Reserved.set(PPC::LR8); @@ -214,6 +218,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { MachineFunction &MF = *MBB.getParent(); // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); + // Get the instruction info. + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); // Determine whether 64-bit pointers are used. bool LP64 = Subtarget.isPPC64(); DebugLoc dl = MI.getDebugLoc(); @@ -307,6 +313,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool LP64 = Subtarget.isPPC64(); @@ -350,6 +357,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool LP64 = Subtarget.isPPC64(); @@ -391,6 +399,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -415,6 +424,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -454,9 +464,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return false; } -// Figure out if the offset in the instruction is shifted right two bits. This -// is true for instructions like "STD", which the machine implicitly adds two -// low zeros to. +// Figure out if the offset in the instruction must be a multiple of 4. +// This is true for instructions like "STD". static bool usesIXAddr(const MachineInstr &MI) { unsigned OpC = MI.getOpcode(); @@ -493,6 +502,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); // Get the basic block's function. MachineFunction &MF = *MBB.getParent(); + // Get the instruction info. + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -549,10 +560,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); - if (!isIXAddr) - Offset += MI.getOperand(OffsetOperandNo).getImm(); - else - Offset += MI.getOperand(OffsetOperandNo).getImm() << 2; + Offset += MI.getOperand(OffsetOperandNo).getImm(); // If we're not using a Frame Pointer that has been set to the value of the // SP before having the stack size subtracted from it, then add the stack size @@ -572,8 +580,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { - if (isIXAddr) - Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); return; } @@ -650,11 +656,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { } unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum); - - if (!usesIXAddr(*MI)) - Offset += MI->getOperand(OffsetOperandNo).getImm(); - else - Offset += MI->getOperand(OffsetOperandNo).getImm() << 2; + Offset += MI->getOperand(OffsetOperandNo).getImm(); // It's the load/store FI references that cause issues, as it can be difficult // to materialize the offset if it won't fit in the literal field. Estimate @@ -711,9 +713,10 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, if (Ins != MBB->end()) DL = Ins->getDebugLoc(); + const MachineFunction &MF = *MBB->getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const MachineFunction &MF = *MBB->getParent(); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); BuildMI(*MBB, Ins, DL, MCID, BaseReg) @@ -734,17 +737,7 @@ PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); - - bool isIXAddr = usesIXAddr(MI); - if (!isIXAddr) - Offset += MI.getOperand(OffsetOperandNo).getImm(); - else - Offset += MI.getOperand(OffsetOperandNo).getImm() << 2; - - // Figure out if the offset in the instruction is shifted right two bits. - if (isIXAddr) - Offset >>= 2; // The actual encoded value has the low two bits zero. - + Offset += MI.getOperand(OffsetOperandNo).getImm(); MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 7a48b4b..93626a9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -29,9 +29,8 @@ class Type; class PPCRegisterInfo : public PPCGenRegisterInfo { DenseMap<unsigned, unsigned> ImmToIdxMap; const PPCSubtarget &Subtarget; - const TargetInstrInfo &TII; public: - PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); + PPCRegisterInfo(const PPCSubtarget &SubTarget); /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 57a25f5..b1b4f06 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -11,11 +11,11 @@ //===----------------------------------------------------------------------===// let Namespace = "PPC" in { -def sub_lt : SubRegIndex; -def sub_gt : SubRegIndex; -def sub_eq : SubRegIndex; -def sub_un : SubRegIndex; -def sub_32 : SubRegIndex; +def sub_lt : SubRegIndex<1>; +def sub_gt : SubRegIndex<1, 1>; +def sub_eq : SubRegIndex<1, 2>; +def sub_un : SubRegIndex<1, 3>; +def sub_32 : SubRegIndex<32>; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 14dc794..da03b4c 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -48,6 +48,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) setMCUseCFI(false); + initAsmInfo(); } void PPC32TargetMachine::anchor() { } @@ -90,7 +91,7 @@ public: return *getPPCTargetMachine().getSubtargetImpl(); } - virtual bool addPreRegAlloc(); + virtual bool addPreISel(); virtual bool addILPOpts(); virtual bool addInstSelector(); virtual bool addPreSched2(); @@ -102,9 +103,9 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { return new PPCPassConfig(this, PM); } -bool PPCPassConfig::addPreRegAlloc() { +bool PPCPassConfig::addPreISel() { if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - addPass(createPPCCTRLoops()); + addPass(createPPCCTRLoops(getPPCTargetMachine())); return false; } @@ -121,6 +122,12 @@ bool PPCPassConfig::addILPOpts() { bool PPCPassConfig::addInstSelector() { // Install an instruction selector. addPass(createPPCISelDag(getPPCTargetMachine())); + +#ifndef NDEBUG + if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) + addPass(createPPCCTRLoopsVerify()); +#endif + return false; } diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp new file mode 100644 index 0000000..90e4f15 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -0,0 +1,57 @@ +//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPCTargetObjectFile.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +void +PPC64LinuxTargetObjectFile:: +Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + +const MCSection * PPC64LinuxTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + const MCSection *DefaultSection = + TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM); + + if (DefaultSection != ReadOnlySection) + return DefaultSection; + + // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI + // when we have a constant that contains global relocations. This is + // necessary because of this ABI's handling of pointers to functions in + // a shared library. The address of a function is actually the address + // of a function descriptor, which resides in the .opd section. Generated + // code uses the descriptor directly rather than going via the GOT as some + // other ABIs do, which means that initialized function pointers must + // reference the descriptor. The linker must convert copy relocs of + // pointers to functions in shared libraries into dynamic relocations, + // because of an ordering problem with initialization of copy relocs and + // PLT entries. The dynamic relocation will be initialized by the dynamic + // linker, so we must use DataRelROSection instead of ReadOnlySection. + // For more information, see the description of ELIMINATE_COPY_RELOCS in + // GNU ld. + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + + if (GVar && GVar->isConstant() && + (GVar->getInitializer()->getRelocationInfo() == + Constant::GlobalRelocations)) + return DataRelROSection; + + return DefaultSection; +} diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h new file mode 100644 index 0000000..9203e23 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetObjectFile.h @@ -0,0 +1,32 @@ +//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H +#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + + /// PPC64LinuxTargetObjectFile - This implementation is used for + /// 64-bit PowerPC Linux. + class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + virtual const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + }; + +} // end namespace llvm + +#endif |