diff options
author | Stephen Hines <srhines@google.com> | 2013-08-07 15:07:10 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2013-08-07 15:07:10 -0700 |
commit | fab2daa4a1127ecb217abe2b07c1769122b6fee1 (patch) | |
tree | 268ebfd1963fd98ba412e76819afdf95a7d4267b /lib/Target/SystemZ | |
parent | 8197ac1c1a0a91baa70c4dea8cb488f254ef974c (diff) | |
parent | 10251753b6897adcd22cc981c0cc42f348c109de (diff) | |
download | external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.zip external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.gz external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.bz2 |
Merge commit '10251753b6897adcd22cc981c0cc42f348c109de' into merge-20130807
Conflicts:
lib/Archive/ArchiveReader.cpp
lib/Support/Unix/PathV2.inc
Change-Id: I29d8c1e321a4a380b6013f00bac6a8e4b593cc4e
Diffstat (limited to 'lib/Target/SystemZ')
38 files changed, 3347 insertions, 806 deletions
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 7c28abd..58af2c4 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -29,19 +30,25 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) { } namespace { +enum RegisterKind { + GR32Reg, + GR64Reg, + GR128Reg, + ADDR32Reg, + ADDR64Reg, + FP32Reg, + FP64Reg, + FP128Reg +}; + +enum MemoryKind { + BDMem, + BDXMem, + BDLMem +}; + class SystemZOperand : public MCParsedAsmOperand { public: - enum RegisterKind { - GR32Reg, - GR64Reg, - GR128Reg, - ADDR32Reg, - ADDR64Reg, - FP32Reg, - FP64Reg, - FP128Reg - }; - private: enum OperandKind { KindInvalid, @@ -77,12 +84,15 @@ private: // Base + Disp + Index, where Base and Index are LLVM registers or 0. // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg). + // Length is the operand length for D(L,B)-style operands, otherwise + // it is null. struct MemOp { unsigned Base : 8; unsigned Index : 8; unsigned RegKind : 8; unsigned Unused : 8; const MCExpr *Disp; + const MCExpr *Length; }; union { @@ -139,12 +149,14 @@ public: } static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp, unsigned Index, - SMLoc StartLoc, SMLoc EndLoc) { + const MCExpr *Length, SMLoc StartLoc, + SMLoc EndLoc) { SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc); Op->Mem.RegKind = RegKind; Op->Mem.Base = Base; Op->Mem.Index = Index; Op->Mem.Disp = Disp; + Op->Mem.Length = Length; return Op; } @@ -191,16 +203,20 @@ public: virtual bool isMem() const LLVM_OVERRIDE { return Kind == KindMem; } - bool isMem(RegisterKind RegKind, bool HasIndex) const { + bool isMem(RegisterKind RegKind, MemoryKind MemKind) const { return (Kind == KindMem && Mem.RegKind == RegKind && - (HasIndex || !Mem.Index)); + (MemKind == BDXMem || !Mem.Index) && + (MemKind == BDLMem) == (Mem.Length != 0)); + } + bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const { + return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff); } - bool isMemDisp12(RegisterKind RegKind, bool HasIndex) const { - return isMem(RegKind, HasIndex) && inRange(Mem.Disp, 0, 0xfff); + bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const { + return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287); } - bool isMemDisp20(RegisterKind RegKind, bool HasIndex) const { - return isMem(RegKind, HasIndex) && inRange(Mem.Disp, -524288, 524287); + bool isMemDisp12Len8(RegisterKind RegKind) const { + return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100); } // Override MCParsedAsmOperand. @@ -236,6 +252,13 @@ public: addExpr(Inst, Mem.Disp); Inst.addOperand(MCOperand::CreateReg(Mem.Index)); } + void addBDLAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(Kind == KindMem && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + addExpr(Inst, Mem.Length); + } // Used by the TableGen code to check for particular operand types. bool isGR32() const { return isReg(GR32Reg); } @@ -247,12 +270,13 @@ public: bool isFP32() const { return isReg(FP32Reg); } bool isFP64() const { return isReg(FP64Reg); } bool isFP128() const { return isReg(FP128Reg); } - bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, false); } - bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, false); } - bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, false); } - bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, false); } - bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, true); } - bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, true); } + bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); } + bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); } + bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); } + bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); } + bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); } + bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); } + bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } bool isU4Imm() const { return isImm(0, 15); } bool isU6Imm() const { return isImm(0, 63); } bool isU8Imm() const { return isImm(0, 255); } @@ -288,19 +312,16 @@ private: OperandMatchResultTy parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - RegisterGroup Group, const unsigned *Regs, - SystemZOperand::RegisterKind Kind, - bool IsAddress = false); + RegisterGroup Group, const unsigned *Regs, RegisterKind Kind); bool parseAddress(unsigned &Base, const MCExpr *&Disp, - unsigned &Index, const unsigned *Regs, - SystemZOperand::RegisterKind RegKind, - bool HasIndex); + unsigned &Index, const MCExpr *&Length, + const unsigned *Regs, RegisterKind RegKind); OperandMatchResultTy parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - const unsigned *Regs, SystemZOperand::RegisterKind RegKind, - bool HasIndex); + const unsigned *Regs, RegisterKind RegKind, + MemoryKind MemKind); bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Mnemonic); @@ -331,28 +352,23 @@ public: // Used by the TableGen code to parse particular operand types. OperandMatchResultTy parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, - SystemZOperand::GR32Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); } OperandMatchResultTy parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, - SystemZOperand::GR64Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); } OperandMatchResultTy parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, - SystemZOperand::GR128Reg); + return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg); } OperandMatchResultTy parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, - SystemZOperand::ADDR32Reg, true); + return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg); } OperandMatchResultTy parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, - SystemZOperand::ADDR64Reg, true); + return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg); } OperandMatchResultTy parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -360,33 +376,31 @@ public: } OperandMatchResultTy parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, - SystemZOperand::FP32Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg); } OperandMatchResultTy parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, - SystemZOperand::FP64Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg); } OperandMatchResultTy parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, - SystemZOperand::FP128Reg); + return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); } OperandMatchResultTy parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, SystemZMC::GR32Regs, - SystemZOperand::ADDR32Reg, false); + return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem); } OperandMatchResultTy parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, SystemZMC::GR64Regs, - SystemZOperand::ADDR64Reg, false); + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem); } OperandMatchResultTy parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return parseAddress(Operands, SystemZMC::GR64Regs, - SystemZOperand::ADDR64Reg, true); + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem); + } + OperandMatchResultTy + parseBDLAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem); } OperandMatchResultTy parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -474,12 +488,12 @@ bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, RegisterGroup Group, const unsigned *Regs, - SystemZOperand::RegisterKind Kind, - bool IsAddress) { + RegisterKind Kind) { if (Parser.getTok().isNot(AsmToken::Percent)) return MatchOperand_NoMatch; Register Reg; + bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg); if (parseRegister(Reg, Group, Regs, IsAddress)) return MatchOperand_ParseFail; @@ -488,14 +502,13 @@ SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return MatchOperand_Success; } -// Parse a memory operand into Base, Disp and Index. Regs maps asm -// register numbers to LLVM register numbers and RegKind says what kind -// of address register we're using (ADDR32Reg or ADDR64Reg). HasIndex -// says whether the address allows index registers. +// Parse a memory operand into Base, Disp, Index and Length. +// Regs maps asm register numbers to LLVM register numbers and RegKind +// says what kind of address register we're using (ADDR32Reg or ADDR64Reg). bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, - unsigned &Index, const unsigned *Regs, - SystemZOperand::RegisterKind RegKind, - bool HasIndex) { + unsigned &Index, const MCExpr *&Length, + const unsigned *Regs, + RegisterKind RegKind) { // Parse the displacement, which must always be present. if (getParser().parseExpression(Disp)) return true; @@ -503,27 +516,33 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, // Parse the optional base and index. Index = 0; Base = 0; + Length = 0; if (getLexer().is(AsmToken::LParen)) { Parser.Lex(); - // Parse the first register. - Register Reg; - if (parseRegister(Reg, RegGR, Regs, RegKind)) - return true; + if (getLexer().is(AsmToken::Percent)) { + // Parse the first register and decide whether it's a base or an index. + Register Reg; + if (parseRegister(Reg, RegGR, Regs, RegKind)) + return true; + if (getLexer().is(AsmToken::Comma)) + Index = Reg.Num; + else + Base = Reg.Num; + } else { + // Parse the length. + if (getParser().parseExpression(Length)) + return true; + } - // Check whether there's a second register. If so, the one that we - // just parsed was the index. + // Check whether there's a second register. It's the base if so. if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); - - if (!HasIndex) - return Error(Reg.StartLoc, "invalid use of indexed addressing"); - - Index = Reg.Num; + Register Reg; if (parseRegister(Reg, RegGR, Regs, RegKind)) return true; + Base = Reg.Num; } - Base = Reg.Num; // Consume the closing bracket. if (getLexer().isNot(AsmToken::RParen)) @@ -537,19 +556,37 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, // are as above. SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - const unsigned *Regs, - SystemZOperand::RegisterKind RegKind, - bool HasIndex) { + const unsigned *Regs, RegisterKind RegKind, + MemoryKind MemKind) { SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base, Index; const MCExpr *Disp; - if (parseAddress(Base, Disp, Index, Regs, RegKind, HasIndex)) + const MCExpr *Length; + if (parseAddress(Base, Disp, Index, Length, Regs, RegKind)) return MatchOperand_ParseFail; + if (Index && MemKind != BDXMem) + { + Error(StartLoc, "invalid use of indexed addressing"); + return MatchOperand_ParseFail; + } + + if (Length && MemKind != BDLMem) + { + Error(StartLoc, "invalid use of length addressing"); + return MatchOperand_ParseFail; + } + + if (!Length && MemKind == BDLMem) + { + Error(StartLoc, "missing length in address"); + return MatchOperand_ParseFail; + } + SMLoc EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index, - StartLoc, EndLoc)); + Length, StartLoc, EndLoc)); return MatchOperand_Success; } @@ -639,14 +676,13 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, // so we treat any plain expression as an immediate. SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base, Index; - const MCExpr *Expr; - if (parseAddress(Base, Expr, Index, SystemZMC::GR64Regs, - SystemZOperand::ADDR64Reg, true)) + const MCExpr *Expr, *Length; + if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg)) return true; SMLoc EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - if (Base || Index) + if (Base || Index || Length) Operands.push_back(SystemZOperand::createInvalid(StartLoc, EndLoc)); else Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index edb679d..ab657f6 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_target(SystemZCodeGen SystemZAsmPrinter.cpp SystemZCallingConv.cpp SystemZConstantPoolValue.cpp + SystemZElimCompare.cpp SystemZFrameLowering.cpp SystemZISelDAGToDAG.cpp SystemZISelLowering.cpp @@ -22,11 +23,12 @@ add_llvm_target(SystemZCodeGen SystemZLongBranch.cpp SystemZMCInstLower.cpp SystemZRegisterInfo.cpp + SystemZSelectionDAGInfo.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp ) -add_dependencies(LLVMSystemZCodeGen intrinsics_gen) +add_dependencies(LLVMSystemZCodeGen SystemZCommonTableGen intrinsics_gen) add_subdirectory(AsmParser) add_subdirectory(Disassembler) diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 4e4816b..79469b6 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -226,6 +226,18 @@ static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field, return MCDisassembler::Success; } +static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Length = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Length < 256 && "Invalid BDLAddr12Len8"); + Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::CreateImm(Disp)); + Inst.addOperand(MCOperand::CreateImm(Length + 1)); + return MCDisassembler::Success; +} + static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address, const void *Decoder) { @@ -262,6 +274,13 @@ static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs); } +static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, + uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs); +} + #include "SystemZGenDisassemblerTables.inc" DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 369802b..37ebff3 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -154,6 +154,17 @@ void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum, MI->getOperand(OpNum + 2).getReg(), O); } +void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + unsigned Base = MI->getOperand(OpNum).getReg(); + uint64_t Disp = MI->getOperand(OpNum + 1).getImm(); + uint64_t Length = MI->getOperand(OpNum + 2).getImm(); + O << Disp << '(' << Length; + if (Base) + O << ",%" << getRegisterName(Base); + O << ')'; +} + void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O) { static const char *const CondNames[] = { diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index f77282e..30cdee5 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -48,6 +48,7 @@ private: void printOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 7721b1f..bda7714 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -49,7 +49,7 @@ private: SmallVectorImpl<MCFixup> &Fixups) const; // Called by the TableGen code to get the binary encoding of an address. - // The index, if any, is encoded first, followed by the base, + // The index or length, if any, is encoded first, followed by the base, // followed by the displacement. In a 20-bit displacement, // the low 12 bits are encoded before the high 8 bits. uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum, @@ -60,6 +60,8 @@ private: SmallVectorImpl<MCFixup> &Fixups) const; uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const; + uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; // Operand OpNum of MI needs a PC-relative fixup of kind Kind at // Offset bytes from the start of MI. Add the fixup to Fixups @@ -112,7 +114,7 @@ uint64_t SystemZMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { if (MO.isReg()) - return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); if (MO.isImm()) return static_cast<uint64_t>(MO.getImm()); llvm_unreachable("Unexpected operand type!"); @@ -157,6 +159,16 @@ getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, | ((Disp & 0xff000) >> 12); } +uint64_t SystemZMCCodeEmitter:: +getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups); + uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups) - 1; + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len)); + return (Len << 16) | (Base << 12) | Disp; +} + uint64_t SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 55e9fc0..2782b63 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -118,11 +118,6 @@ such as ICM and STCM. -- -We could make more use of the ROTATE AND ... SELECTED BITS instructions. -At the moment we only use RISBG, and only then for subword atomic operations. - --- - DAGCombiner can detect integer absolute, but there's not yet an associated ISD opcode. We could add one and implement it using LOAD POSITIVE. Negated absolutes could use LOAD NEGATIVE. diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index 24612bb..eccc2aa 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -30,16 +30,28 @@ namespace llvm { const unsigned CCMASK_3 = 1 << 0; const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; - // Condition-code mask assignments for floating-point comparisons. + // Condition-code mask assignments for integer and floating-point + // comparisons. const unsigned CCMASK_CMP_EQ = CCMASK_0; const unsigned CCMASK_CMP_LT = CCMASK_1; const unsigned CCMASK_CMP_GT = CCMASK_2; - const unsigned CCMASK_CMP_UO = CCMASK_3; const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT; const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT; const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT; + + // Condition-code mask assignments for floating-point comparisons only. + const unsigned CCMASK_CMP_UO = CCMASK_3; const unsigned CCMASK_CMP_O = CCMASK_ANY ^ CCMASK_CMP_UO; + // All condition-code values produced by comparisons. + const unsigned CCMASK_ICMP = CCMASK_0 | CCMASK_1 | CCMASK_2; + const unsigned CCMASK_FCMP = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; + + // Condition-code mask assignments for CS. + const unsigned CCMASK_CS_EQ = CCMASK_0; + const unsigned CCMASK_CS_NE = CCMASK_1; + const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1; + // Return true if Val fits an LLILL operand. static inline bool isImmLL(uint64_t Val) { return (Val & ~0x000000000000ffffULL) == 0; @@ -73,6 +85,7 @@ namespace llvm { FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); + FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); } // end namespace llvm; #endif diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td index e03c32f..abf5c8e 100644 --- a/lib/Target/SystemZ/SystemZ.td +++ b/lib/Target/SystemZ/SystemZ.td @@ -14,13 +14,10 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// SystemZ supported processors +// SystemZ supported processors and features //===----------------------------------------------------------------------===// -class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; - -def : Proc<"z10", []>; +include "SystemZProcessors.td" //===----------------------------------------------------------------------===// // Register file description diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 1e15ab1..3a57ea0 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -100,7 +100,7 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0), 0); + TD->getPointerSize(0)); } Stubs.clear(); } diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp new file mode 100644 index 0000000..b8a77db --- /dev/null +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -0,0 +1,471 @@ +//===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass: +// (1) tries to remove compares if CC already contains the required information +// (2) fuses compares and branches into COMPARE AND BRANCH instructions +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-elim-compare" + +#include "SystemZTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); +STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); +STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); + +namespace { + // Represents the references to a particular register in one or more + // instructions. + struct Reference { + Reference() + : Def(false), Use(false), IndirectDef(false), IndirectUse(false) {} + + Reference &operator|=(const Reference &Other) { + Def |= Other.Def; + IndirectDef |= Other.IndirectDef; + Use |= Other.Use; + IndirectUse |= Other.IndirectUse; + return *this; + } + + operator bool() const { return Def || Use; } + + // True if the register is defined or used in some form, either directly or + // via a sub- or super-register. + bool Def; + bool Use; + + // True if the register is defined or used indirectly, by a sub- or + // super-register. + bool IndirectDef; + bool IndirectUse; + }; + + class SystemZElimCompare : public MachineFunctionPass { + public: + static char ID; + SystemZElimCompare(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(0), TRI(0) {} + + virtual const char *getPassName() const { + return "SystemZ Comparison Elimination"; + } + + bool processBlock(MachineBasicBlock *MBB); + bool runOnMachineFunction(MachineFunction &F); + + private: + Reference getRegReferences(MachineInstr *MI, unsigned Reg); + bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool convertToLoadAndTest(MachineInstr *MI); + bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool optimizeCompareZero(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + bool fuseCompareAndBranch(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers); + + const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + }; + + char SystemZElimCompare::ID = 0; +} // end of anonymous namespace + +FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { + return new SystemZElimCompare(TM); +} + +// Return true if CC is live out of MBB. +static bool isCCLiveOut(MachineBasicBlock *MBB) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + +// Return true if any CC result of MI would reflect the value of subreg +// SubReg of Reg. +static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) { + if (MI->getNumOperands() > 0 && + MI->getOperand(0).isReg() && + MI->getOperand(0).isDef() && + MI->getOperand(0).getReg() == Reg && + MI->getOperand(0).getSubReg() == SubReg) + return true; + + switch (MI->getOpcode()) { + case SystemZ::LR: + case SystemZ::LGR: + case SystemZ::LGFR: + case SystemZ::LTR: + case SystemZ::LTGR: + case SystemZ::LTGFR: + case SystemZ::LER: + case SystemZ::LDR: + case SystemZ::LXR: + case SystemZ::LTEBR: + case SystemZ::LTDBR: + case SystemZ::LTXBR: + if (MI->getOperand(1).getReg() == Reg && + MI->getOperand(1).getSubReg() == SubReg) + return true; + } + + return false; +} + +// Describe the references to Reg in MI, including sub- and super-registers. +Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) { + Reference Ref; + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (MO.isReg()) { + if (unsigned MOReg = MO.getReg()) { + if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) { + if (MO.isUse()) { + Ref.Use = true; + Ref.IndirectUse |= (MOReg != Reg); + } + if (MO.isDef()) { + Ref.Def = true; + Ref.IndirectDef |= (MOReg != Reg); + } + } + } + } + } + return Ref; +} + +// Compare compares the result of MI against zero. If MI is an addition +// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition +// and convert the branch to a BRCT(G). Return true on success. +bool +SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + // Check whether we have an addition of -1. + unsigned Opcode = MI->getOpcode(); + unsigned BRCT; + if (Opcode == SystemZ::AHI) + BRCT = SystemZ::BRCT; + else if (Opcode == SystemZ::AGHI) + BRCT = SystemZ::BRCTG; + else + return false; + if (MI->getOperand(2).getImm() != -1) + return false; + + // Check whether we have a single JLH. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + if (Branch->getOpcode() != SystemZ::BRC || + Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || + Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE) + return false; + + // We already know that there are no references to the register between + // MI and Compare. Make sure that there are also no references between + // Compare and Branch. + unsigned SrcReg = Compare->getOperand(0).getReg(); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (getRegReferences(MBBI, SrcReg)) + return false; + + // The transformation is OK. Rebuild Branch as a BRCT(G). + MachineOperand Target(Branch->getOperand(2)); + Branch->RemoveOperand(2); + Branch->RemoveOperand(1); + Branch->RemoveOperand(0); + Branch->setDesc(TII->get(BRCT)); + MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(Target) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + MI->removeFromParent(); + return true; +} + +// If MI is a load instruction, try to convert it into a LOAD AND TEST. +// Return true on success. +bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) { + unsigned Opcode = TII->getLoadAndTest(MI->getOpcode()); + if (!Opcode) + return false; + + MI->setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + return true; +} + +// The CC users in CCUsers are testing the result of a comparison of some +// value X against zero and we know that any CC value produced by MI +// would also reflect the value of X. Try to adjust CCUsers so that +// they test the result of MI directly, returning true on success. +// Leave everything unchanged on failure. +bool SystemZElimCompare:: +adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + int Opcode = MI->getOpcode(); + const MCInstrDesc &Desc = TII->get(Opcode); + unsigned MIFlags = Desc.TSFlags; + + // See which compare-style condition codes are available. + unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); + + // For unsigned comparisons with zero, only equality makes sense. + unsigned CompareFlags = Compare->getDesc().TSFlags; + if (CompareFlags & SystemZII::IsLogical) + ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; + + if (ReusableCCMask == 0) + return false; + + unsigned CCValues = SystemZII::getCCValues(MIFlags); + assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); + + // Now check whether these flags are enough for all users. + SmallVector<MachineOperand *, 4> AlterMasks; + for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { + MachineInstr *MI = CCUsers[I]; + + // Fail if this isn't a use of CC that we understand. + unsigned Flags = MI->getDesc().TSFlags; + unsigned FirstOpNum; + if (Flags & SystemZII::CCMaskFirst) + FirstOpNum = 0; + else if (Flags & SystemZII::CCMaskLast) + FirstOpNum = MI->getNumExplicitOperands() - 2; + else + return false; + + // Check whether the instruction predicate treats all CC values + // outside of ReusableCCMask in the same way. In that case it + // doesn't matter what those CC values mean. + unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); + unsigned OutValid = ~ReusableCCMask & CCValid; + unsigned OutMask = ~ReusableCCMask & CCMask; + if (OutMask != 0 && OutMask != OutValid) + return false; + + AlterMasks.push_back(&MI->getOperand(FirstOpNum)); + AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); + } + + // All users are OK. Adjust the masks for MI. + for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { + AlterMasks[I]->setImm(CCValues); + unsigned CCMask = AlterMasks[I + 1]->getImm(); + if (CCMask & ~ReusableCCMask) + AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | + (CCValues & ~ReusableCCMask)); + } + + // CC is now live after MI. + int CCDef = MI->findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); + assert(CCDef >= 0 && "Couldn't find CC set"); + MI->getOperand(CCDef).setIsDead(false); + + // Clear any intervening kills of CC. + MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) + MBBI->clearRegisterKills(SystemZ::CC, TRI); + + return true; +} + +// Return true if Compare is a comparison against zero. +static bool isCompareZero(MachineInstr *Compare) { + switch (Compare->getOpcode()) { + case SystemZ::LTEBRCompare: + case SystemZ::LTDBRCompare: + case SystemZ::LTXBRCompare: + return true; + + default: + return (Compare->getNumExplicitOperands() == 2 && + Compare->getOperand(1).isImm() && + Compare->getOperand(1).getImm() == 0); + } +} + +// Try to optimize cases where comparison instruction Compare is testing +// a value against zero. Return true on success and if Compare should be +// deleted as dead. CCUsers is the list of instructions that use the CC +// value produced by Compare. +bool SystemZElimCompare:: +optimizeCompareZero(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + if (!isCompareZero(Compare)) + return false; + + // Search back for CC results that are based on the first operand. + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcSubReg = Compare->getOperand(0).getSubReg(); + MachineBasicBlock *MBB = Compare->getParent(); + MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->begin(); + Reference CCRefs; + Reference SrcRefs; + while (MBBI != MBBE) { + --MBBI; + MachineInstr *MI = MBBI; + if (resultTests(MI, SrcReg, SrcSubReg)) { + // Try to remove both MI and Compare by converting a branch to BRCT(G). + // We don't care in this case whether CC is modified between MI and + // Compare. + if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) { + BranchOnCounts += 1; + return true; + } + // Try to eliminate Compare by reusing a CC result from MI. + if ((!CCRefs && convertToLoadAndTest(MI)) || + (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { + EliminatedComparisons += 1; + return true; + } + } + SrcRefs |= getRegReferences(MI, SrcReg); + if (SrcRefs.Def) + return false; + CCRefs |= getRegReferences(MI, SystemZ::CC); + if (CCRefs.Use && CCRefs.Def) + return false; + } + return false; +} + +// Try to fuse comparison instruction Compare into a later branch. +// Return true on success and if Compare is therefore redundant. +bool SystemZElimCompare:: +fuseCompareAndBranch(MachineInstr *Compare, + SmallVectorImpl<MachineInstr *> &CCUsers) { + // See whether we have a comparison that can be fused. + unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), + Compare); + if (!FusedOpcode) + return false; + + // See whether we have a single branch with which to fuse. + if (CCUsers.size() != 1) + return false; + MachineInstr *Branch = CCUsers[0]; + if (Branch->getOpcode() != SystemZ::BRC) + return false; + + // Make sure that the operands are available at the branch. + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcReg2 = (Compare->getOperand(1).isReg() ? + Compare->getOperand(1).getReg() : 0); + MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI->modifiesRegister(SrcReg, TRI) || + (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) + return false; + + // Read the branch mask and target. + MachineOperand CCMask(MBBI->getOperand(1)); + MachineOperand Target(MBBI->getOperand(2)); + assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && + "Invalid condition-code mask for integer comparison"); + + // Clear out all current operands. + int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); + assert(CCUse >= 0 && "BRC must use CC"); + Branch->RemoveOperand(CCUse); + Branch->RemoveOperand(2); + Branch->RemoveOperand(1); + Branch->RemoveOperand(0); + + // Rebuild Branch as a fused compare and branch. + Branch->setDesc(TII->get(FusedOpcode)); + MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) + .addOperand(Compare->getOperand(0)) + .addOperand(Compare->getOperand(1)) + .addOperand(CCMask) + .addOperand(Target) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + + // Clear any intervening kills of SrcReg and SrcReg2. + MBBI = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MBBI->clearRegisterKills(SrcReg, TRI); + if (SrcReg2) + MBBI->clearRegisterKills(SrcReg2, TRI); + } + FusedComparisons += 1; + return true; +} + +// Process all comparison instructions in MBB. Return true if something +// changed. +bool SystemZElimCompare::processBlock(MachineBasicBlock *MBB) { + bool Changed = false; + + // Walk backwards through the block looking for comparisons, recording + // all CC users as we go. The subroutines can delete Compare and + // instructions before it. + bool CompleteCCUsers = !isCCLiveOut(MBB); + SmallVector<MachineInstr *, 4> CCUsers; + MachineBasicBlock::iterator MBBI = MBB->end(); + while (MBBI != MBB->begin()) { + MachineInstr *MI = --MBBI; + if (CompleteCCUsers && + MI->isCompare() && + (optimizeCompareZero(MI, CCUsers) || + fuseCompareAndBranch(MI, CCUsers))) { + ++MBBI; + MI->removeFromParent(); + Changed = true; + CCUsers.clear(); + CompleteCCUsers = true; + continue; + } + + Reference CCRefs(getRegReferences(MI, SystemZ::CC)); + if (CCRefs.Def) { + CCUsers.clear(); + CompleteCCUsers = !CCRefs.IndirectDef; + } + if (CompleteCCUsers && CCRefs.Use) + CCUsers.push_back(MI); + } + return Changed; +} + +bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo()); + TRI = &TII->getRegisterInfo(); + + bool Changed = false; + for (MachineFunction::iterator MFI = F.begin(), MFE = F.end(); + MFI != MFE; ++MFI) + Changed |= processBlock(MFI); + + return Changed; +} diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index c0d72c3..a58da90 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -14,19 +14,15 @@ #include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" using namespace llvm; -SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, - const SystemZSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, - -SystemZMC::CallFrameSize), - TM(tm), - STI(sti) { +namespace { // The ABI-defined register save slots, relative to the incoming stack // pointer. - static const unsigned SpillOffsetTable[][2] = { + static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { { SystemZ::R2D, 0x10 }, { SystemZ::R3D, 0x18 }, { SystemZ::R4D, 0x20 }, @@ -46,11 +42,23 @@ SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, { SystemZ::F4D, 0x90 }, { SystemZ::F6D, 0x98 } }; +} +SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, + const SystemZSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, + -SystemZMC::CallFrameSize, 8), + TM(tm), STI(sti) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) - RegSpillOffsets[SpillOffsetTable[I][0]] = SpillOffsetTable[I][1]; + RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; +} + +const TargetFrameLowering::SpillSlot * +SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { + NumEntries = array_lengthof(SpillOffsetTable); + return SpillOffsetTable; } void SystemZFrameLowering:: @@ -127,14 +135,12 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Scan the call-saved GPRs and find the bounds of the register spill area. - unsigned SavedGPRFrameSize = 0; unsigned LowGPR = 0; unsigned HighGPR = SystemZ::R15D; unsigned StartOffset = -1U; for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { - SavedGPRFrameSize += 8; unsigned Offset = RegSpillOffsets[Reg]; assert(Offset && "Unexpected GPR save"); if (StartOffset > Offset) { @@ -144,9 +150,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, } } - // Save information about the range and location of the call-saved - // registers, for use by the epilogue inserter. - ZFI->setSavedGPRFrameSize(SavedGPRFrameSize); + // Save the range of call-saved registers, for use by the epilogue inserter. ZFI->setLowSavedGPR(LowGPR); ZFI->setHighSavedGPR(HighGPR); @@ -260,6 +264,22 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +void SystemZFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + uint64_t MaxReach = (MFFrame->estimateStackSize(MF) + + SystemZMC::CallFrameSize * 2); + if (!isUInt<12>(MaxReach)) { + // We may need register scavenging slots if some parts of the frame + // are outside the reach of an unsigned 12-bit displacement. + // Create 2 for the case where both addresses in an MVC are + // out of range. + RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); + RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false)); + } +} + // Emit instructions before MBBI (in MBB) to add NumBytes to Reg. static void emitIncrement(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -297,7 +317,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo &MRI = MMI.getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo(); bool HasFP = hasFP(MF); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -322,7 +342,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { if (SystemZ::GR64BitRegClass.contains(Reg)) { int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; MMI.addFrameInst(MCCFIInstruction::createOffset( - GPRSaveLabel, MRI.getDwarfRegNum(Reg, true), Offset)); + GPRSaveLabel, MRI->getDwarfRegNum(Reg, true), Offset)); } } } @@ -351,7 +371,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL)) .addSym(SetFPLabel); - unsigned HardFP = MRI.getDwarfRegNum(SystemZ::R11D, true); + unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true); MMI.addFrameInst( MCCFIInstruction::createDefCfaRegister(SetFPLabel, HardFP)); @@ -379,7 +399,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { // Add CFI for the this save. if (!FPRSaveLabel) FPRSaveLabel = MMI.getContext().CreateTempSymbol(); - unsigned Reg = MRI.getDwarfRegNum(I->getReg(), true); + unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); int64_t Offset = getFrameIndexOffset(MF, I->getFrameIdx()); MMI.addFrameInst(MCCFIInstruction::createOffset( FPRSaveLabel, Reg, SPOffsetFromCFA + Offset)); @@ -449,11 +469,6 @@ int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, // offset is therefore negative. int64_t Offset = (MFFrame->getObjectOffset(FI) + MFFrame->getOffsetAdjustment()); - if (FI >= 0) - // Non-fixed objects are allocated below the incoming stack pointer. - // Account for the space at the top of the frame that we choose not - // to allocate. - Offset += getUnallocatedTopBytes(MF); // Make the offset relative to the incoming stack pointer. Offset -= getOffsetOfLocalArea(); @@ -465,23 +480,12 @@ int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, } uint64_t SystemZFrameLowering:: -getUnallocatedTopBytes(const MachineFunction &MF) const { - return MF.getInfo<SystemZMachineFunctionInfo>()->getSavedGPRFrameSize(); -} - -uint64_t SystemZFrameLowering:: getAllocatedStackSize(const MachineFunction &MF) const { const MachineFrameInfo *MFFrame = MF.getFrameInfo(); // Start with the size of the local variables and spill slots. uint64_t StackSize = MFFrame->getStackSize(); - // Remove any bytes that we choose not to allocate. - StackSize -= getUnallocatedTopBytes(MF); - - // Include space for an emergency spill slot, if one might be needed. - StackSize += getEmergencySpillSlotSize(MF); - // We need to allocate the ABI-defined 160-byte base area whenever // we allocate stack space for our own use and whenever we call another // function. @@ -491,19 +495,6 @@ getAllocatedStackSize(const MachineFunction &MF) const { return StackSize; } -unsigned SystemZFrameLowering:: -getEmergencySpillSlotSize(const MachineFunction &MF) const { - const MachineFrameInfo *MFFrame = MF.getFrameInfo(); - uint64_t MaxReach = MFFrame->getStackSize() + SystemZMC::CallFrameSize * 2; - return isUInt<12>(MaxReach) ? 0 : 8; -} - -unsigned SystemZFrameLowering:: -getEmergencySpillSlotOffset(const MachineFunction &MF) const { - assert(getEmergencySpillSlotSize(MF) && "No emergency spill slot"); - return SystemZMC::CallFrameSize; -} - bool SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { // The ABI requires us to allocate 160 bytes of stack space for the callee, diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index 5ca049c..9b0a1d5 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -29,7 +29,10 @@ public: SystemZFrameLowering(const SystemZTargetMachine &tm, const SystemZSubtarget &sti); - // Override FrameLowering. + // Override TargetFrameLowering. + virtual bool isFPCloseToIncomingSP() const LLVM_OVERRIDE { return false; } + virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const + LLVM_OVERRIDE; virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const LLVM_OVERRIDE; @@ -45,6 +48,8 @@ public: const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; + virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const; virtual void emitPrologue(MachineFunction &MF) const LLVM_OVERRIDE; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const LLVM_OVERRIDE; @@ -59,29 +64,9 @@ public: MachineBasicBlock::iterator MI) const LLVM_OVERRIDE; - // The target-independent code automatically allocates save slots for - // call-saved GPRs. However, we don't need those slots for SystemZ, - // because the ABI sets aside GPR save slots in the caller-allocated part - // of the frame. Since the target-independent code puts this unneeded - // area at the top of the callee-allocated part of frame, we choose not - // to allocate it and adjust the offsets accordingly. Return the - // size of this unallocated area. - // FIXME: seems a bit hackish. - uint64_t getUnallocatedTopBytes(const MachineFunction &MF) const; - // Return the number of bytes in the callee-allocated part of the frame. uint64_t getAllocatedStackSize(const MachineFunction &MF) const; - // Return the number of frame bytes that should be reserved for - // an emergency spill slot, for use by the register scaveneger. - // Return 0 if register scaveging won't be needed. - unsigned getEmergencySpillSlotSize(const MachineFunction &MF) const; - - // Return the offset from the frame pointer of the emergency spill slot, - // which always fits within a 12-bit unsigned displacement field. - // Only valid if getEmergencySpillSlotSize(MF) returns nonzero. - unsigned getEmergencySpillSlotOffset(const MachineFunction &MF) const; - // Return the byte offset from the incoming stack pointer of Reg's // ABI-defined save slot. Return 0 if no slot is defined for Reg. unsigned getRegSpillOffset(unsigned Reg) const { diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f10ba23..d9794b1 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -91,6 +92,37 @@ struct SystemZAddressingMode { } }; +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; +} + +// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation +// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and +// Rotate (I5). The combined operand value is effectively: +// +// (or (rotl Input, Rotate), ~Mask) +// +// for RNSBG and: +// +// (and (rotl Input, Rotate), Mask) +// +// otherwise. The value has BitSize bits. +struct RxSBGOperands { + RxSBGOperands(unsigned Op, SDValue N) + : Opcode(Op), BitSize(N.getValueType().getSizeInBits()), + Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63), + Rotate(0) {} + + unsigned Opcode; + unsigned BitSize; + uint64_t Mask; + SDValue Input; + unsigned Start; + unsigned End; + unsigned Rotate; +}; + class SystemZDAGToDAGISel : public SelectionDAGISel { const SystemZTargetLowering &Lowering; const SystemZSubtarget &Subtarget; @@ -100,6 +132,14 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); } + const SystemZTargetMachine &getTargetMachine() const { + return static_cast<const SystemZTargetMachine &>(TM); + } + + const SystemZInstrInfo *getInstrInfo() const { + return getTargetMachine().getInstrInfo(); + } + // Try to fold more of the base or index of AM into AM, where IsBase // selects between the base and index. bool expandAddress(SystemZAddressingMode &AM, bool IsBase); @@ -199,6 +239,33 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { Addr, Base, Disp, Index); } + // Check whether (or Op (and X InsertMask)) is effectively an insertion + // of X into bits InsertMask of some Y != Op. Return true if so and + // set Op to that Y. + bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask); + + // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used. + // Return true on success. + bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask); + + // Try to fold some of RxSBG.Input into other fields of RxSBG. + // Return true on success. + bool expandRxSBG(RxSBGOperands &RxSBG); + + // Return an undefined i64 value. + SDValue getUNDEF64(SDLoc DL); + + // Convert N to VT, if it isn't already. + SDValue convertTo(SDLoc DL, EVT VT, SDValue N); + + // Try to implement AND or shift node N using RISBG with the zero flag set. + // Return the selected node on success, otherwise return null. + SDNode *tryRISBGZero(SDNode *N); + + // Try to use RISBG or Opcode to implement OR or XOR node N. + // Return the selected node on success, otherwise return null. + SDNode *tryRxSBG(SDNode *N, unsigned Opcode); + // If Op0 is null, then Node is a constant that can be loaded using: // // (Opcode UpperVal LowerVal) @@ -209,6 +276,8 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + bool storeLoadCanUseMVC(SDNode *N) const; + public: SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel), @@ -518,6 +587,304 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, return true; } +bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, + uint64_t InsertMask) { + // We're only interested in cases where the insertion is into some operand + // of Op, rather than into Op itself. The only useful case is an AND. + if (Op.getOpcode() != ISD::AND) + return false; + + // We need a constant mask. + ConstantSDNode *MaskNode = + dyn_cast<ConstantSDNode>(Op.getOperand(1).getNode()); + if (!MaskNode) + return false; + + // It's not an insertion of Op.getOperand(0) if the two masks overlap. + uint64_t AndMask = MaskNode->getZExtValue(); + if (InsertMask & AndMask) + return false; + + // It's only an insertion if all bits are covered or are known to be zero. + // The inner check covers all cases but is more expensive. + uint64_t Used = allOnes(Op.getValueType().getSizeInBits()); + if (Used != (AndMask | InsertMask)) { + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne); + if (Used != (AndMask | InsertMask | KnownZero.getZExtValue())) + return false; + } + + Op = Op.getOperand(0); + return true; +} + +bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) { + const SystemZInstrInfo *TII = getInstrInfo(); + if (RxSBG.Rotate != 0) + Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)); + Mask &= RxSBG.Mask; + if (TII->isRxSBGMask(Mask, RxSBG.BitSize, RxSBG.Start, RxSBG.End)) { + RxSBG.Mask = Mask; + return true; + } + return false; +} + +// RxSBG.Input is a shift of Count bits in the direction given by IsLeft. +// Return true if the result depends on the signs or zeros that are +// shifted in. +static bool shiftedInBitsMatter(RxSBGOperands &RxSBG, uint64_t Count, + bool IsLeft) { + // Work out which bits of the shift result are zeros or sign copies. + uint64_t ShiftedIn = allOnes(Count); + if (!IsLeft) + ShiftedIn <<= RxSBG.BitSize - Count; + + // Rotate that mask in the same way as RxSBG.Input is rotated. + if (RxSBG.Rotate != 0) + ShiftedIn = ((ShiftedIn << RxSBG.Rotate) | + (ShiftedIn >> (64 - RxSBG.Rotate))); + + // Fail if any of the zero or sign bits are used. + return (ShiftedIn & RxSBG.Mask) != 0; +} + +bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) { + SDValue N = RxSBG.Input; + unsigned Opcode = N.getOpcode(); + switch (Opcode) { + case ISD::AND: { + if (RxSBG.Opcode == SystemZ::RNSBG) + return false; + + ConstantSDNode *MaskNode = + dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known zeros, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + Mask |= KnownZero.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + + case ISD::OR: { + if (RxSBG.Opcode != SystemZ::RNSBG) + return false; + + ConstantSDNode *MaskNode = + dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!MaskNode) + return false; + + SDValue Input = N.getOperand(0); + uint64_t Mask = ~MaskNode->getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) { + // If some bits of Input are already known ones, those bits will have + // been removed from the mask. See if adding them back in makes the + // mask suitable. + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + Mask &= ~KnownOne.getZExtValue(); + if (!refineRxSBGMask(RxSBG, Mask)) + return false; + } + RxSBG.Input = Input; + return true; + } + + case ISD::ROTL: { + // Any 64-bit rotate left can be merged into the RxSBG. + if (RxSBG.BitSize != 64) + return false; + ConstantSDNode *CountNode + = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SHL: { + ConstantSDNode *CountNode = + dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + if (Count < 1 || Count >= RxSBG.BitSize) + return false; + + if (RxSBG.Opcode == SystemZ::RNSBG) { + // Treat (shl X, count) as (rotl X, size-count) as long as the bottom + // count bits from RxSBG.Input are ignored. + if (shiftedInBitsMatter(RxSBG, Count, true)) + return false; + } else { + // Treat (shl X, count) as (and (rotl X, count), ~0<<count). + if (!refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count) << Count)) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate + Count) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SRL: + case ISD::SRA: { + ConstantSDNode *CountNode = + dyn_cast<ConstantSDNode>(N.getOperand(1).getNode()); + if (!CountNode) + return false; + + uint64_t Count = CountNode->getZExtValue(); + if (Count < 1 || Count >= RxSBG.BitSize) + return false; + + if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) { + // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top + // count bits from RxSBG.Input are ignored. + if (shiftedInBitsMatter(RxSBG, Count, false)) + return false; + } else { + // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), + // which is similar to SLL above. + if (!refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count))) + return false; + } + + RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; + RxSBG.Input = N.getOperand(0); + return true; + } + default: + return false; + } +} + +SDValue SystemZDAGToDAGISel::getUNDEF64(SDLoc DL) { + SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); + return SDValue(N, 0); +} + +SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) { + if (N.getValueType() == MVT::i32 && VT == MVT::i64) { + SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64); + SDNode *Insert = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, + DL, VT, getUNDEF64(DL), N, Index); + return SDValue(Insert, 0); + } + if (N.getValueType() == MVT::i64 && VT == MVT::i32) { + SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64); + SDNode *Extract = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, VT, N, Index); + return SDValue(Extract, 0); + } + assert(N.getValueType() == VT && "Unexpected value types"); + return N; +} + +SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { + EVT VT = N->getValueType(0); + RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); + unsigned Count = 0; + while (expandRxSBG(RISBG)) + Count += 1; + if (Count == 0) + return 0; + if (Count == 1) { + // Prefer to use normal shift instructions over RISBG, since they can handle + // all cases and are sometimes shorter. + if (N->getOpcode() != ISD::AND) + return 0; + + // Prefer register extensions like LLC over RISBG. Also prefer to start + // out with normal ANDs if one instruction would be enough. We can convert + // these ANDs into an RISBG later if a three-address instruction is useful. + if (VT == MVT::i32 || + RISBG.Mask == 0xff || + RISBG.Mask == 0xffff || + SystemZ::isImmLF(~RISBG.Mask) || + SystemZ::isImmHF(~RISBG.Mask)) { + // Force the new mask into the DAG, since it may include known-one bits. + ConstantSDNode *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode()); + if (MaskN->getZExtValue() != RISBG.Mask) { + SDValue NewMask = CurDAG->getConstant(RISBG.Mask, VT); + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask); + return SelectCode(N); + } + return 0; + } + } + + SDValue Ops[5] = { + getUNDEF64(SDLoc(N)), + convertTo(SDLoc(N), MVT::i64, RISBG.Input), + CurDAG->getTargetConstant(RISBG.Start, MVT::i32), + CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32), + CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32) + }; + N = CurDAG->getMachineNode(SystemZ::RISBG, SDLoc(N), MVT::i64, Ops); + return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); +} + +SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { + // Try treating each operand of N as the second operand of the RxSBG + // and see which goes deepest. + RxSBGOperands RxSBG[] = { + RxSBGOperands(Opcode, N->getOperand(0)), + RxSBGOperands(Opcode, N->getOperand(1)) + }; + unsigned Count[] = { 0, 0 }; + for (unsigned I = 0; I < 2; ++I) + while (expandRxSBG(RxSBG[I])) + Count[I] += 1; + + // Do nothing if neither operand is suitable. + if (Count[0] == 0 && Count[1] == 0) + return 0; + + // Pick the deepest second operand. + unsigned I = Count[0] > Count[1] ? 0 : 1; + SDValue Op0 = N->getOperand(I ^ 1); + + // Prefer IC for character insertions from memory. + if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) + if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Op0.getNode())) + if (Load->getMemoryVT() == MVT::i8) + return 0; + + // See whether we can avoid an AND in the first operand by converting + // ROSBG to RISBG. + if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) + Opcode = SystemZ::RISBG; + + EVT VT = N->getValueType(0); + SDValue Ops[5] = { + convertTo(SDLoc(N), MVT::i64, Op0), + convertTo(SDLoc(N), MVT::i64, RxSBG[I].Input), + CurDAG->getTargetConstant(RxSBG[I].Start, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].End, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].Rotate, MVT::i32) + }; + N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops); + return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); +} + SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal) { @@ -533,6 +900,49 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, return Or.getNode(); } +// N is a (store (load ...), ...) pattern. Return true if it can use MVC. +bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { + StoreSDNode *Store = cast<StoreSDNode>(N); + LoadSDNode *Load = cast<LoadSDNode>(Store->getValue().getNode()); + + // MVC is logically a bytewise copy, so can't be used for volatile accesses. + if (Load->isVolatile() || Store->isVolatile()) + return false; + + // Prefer not to use MVC if either address can use ... RELATIVE LONG + // instructions. + assert(Load->getMemoryVT() == Store->getMemoryVT() && + "Should already have checked that the types match"); + uint64_t Size = Load->getMemoryVT().getStoreSize(); + if (Size > 1 && Size <= 8) { + // Prefer LHRL, LRL and LGRL. + if (Load->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) + return false; + // Prefer STHRL, STRL and STGRL. + if (Store->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER) + return false; + } + + // There's no chance of overlap if the load is invariant. + if (Load->isInvariant()) + return true; + + // If both operands are aligned, they must be equal or not overlap. + if (Load->getAlignment() >= Size && Store->getAlignment() >= Size) + return true; + + // Otherwise we need to check whether there's an alias. + const Value *V1 = Load->getSrcValue(); + const Value *V2 = Store->getSrcValue(); + if (!V1 || !V2) + return false; + + int64_t End1 = Load->getSrcValueOffset() + Size; + int64_t End2 = Store->getSrcValueOffset() + Size; + return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()), + AliasAnalysis::Location(V2, End2, Store->getTBAAInfo())); +} + SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); @@ -544,12 +954,21 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } unsigned Opcode = Node->getOpcode(); + SDNode *ResNode = 0; switch (Opcode) { case ISD::OR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::ROSBG); + goto or_xor; + case ISD::XOR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RXSBG); + // Fall through. + or_xor: // If this is a 64-bit operation in which both 32-bit halves are nonzero, // split the operation into two. - if (Node->getValueType(0) == MVT::i64) + if (!ResNode && Node->getValueType(0) == MVT::i64) if (ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { uint64_t Val = Op1->getZExtValue(); if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) @@ -558,6 +977,17 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } break; + case ISD::AND: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRxSBG(Node, SystemZ::RNSBG); + // Fall through. + case ISD::ROTL: + case ISD::SHL: + case ISD::SRL: + if (!ResNode) + ResNode = tryRISBGZero(Node); + break; + case ISD::Constant: // If this is a 64-bit constant that is out of the range of LLILF, // LLIHF and LGFI, split it into two 32-bit pieces. @@ -582,10 +1012,32 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } } break; + + case SystemZISD::SELECT_CCMASK: { + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + // Prefer to put any load first, so that it can be matched as a + // conditional load. + if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) { + SDValue CCValid = Node->getOperand(2); + SDValue CCMask = Node->getOperand(3); + uint64_t ConstCCValid = + cast<ConstantSDNode>(CCValid.getNode())->getZExtValue(); + uint64_t ConstCCMask = + cast<ConstantSDNode>(CCMask.getNode())->getZExtValue(); + // Invert the condition. + CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, + CCMask.getValueType()); + SDValue Op4 = Node->getOperand(4); + Node = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); + } + break; + } } // Select the default instruction - SDNode *ResNode = SelectCode(Node); + if (!ResNode) + ResNode = SelectCode(Node); DEBUG(errs() << "=> "; if (ResNode == NULL || ResNode == Node) diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 1dc187f..6acdcd4 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -200,11 +200,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); - // Expand these using getExceptionSelectorRegister() and - // getExceptionPointerRegister(). - setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand); - setOperationAction(ISD::EHSELECTION, PtrVT, Expand); - // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -246,6 +241,38 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); + + // We want to use MVC in preference to even a single load/store pair. + MaxStoresPerMemcpy = 0; + MaxStoresPerMemcpyOptSize = 0; + + // The main memset sequence is a byte store followed by an MVC. + // Two STC or MV..I stores win over that, but the kind of fused stores + // generated by target-independent code don't when the byte value is + // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better + // than "STC;MVC". Handle the choice in target-specific code instead. + MaxStoresPerMemset = 0; + MaxStoresPerMemsetOptSize = 0; +} + +bool +SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + case MVT::f128: + return false; + default: + break; + } + + return false; } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { @@ -263,6 +290,21 @@ bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return true; } +bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // Punt on globals for now, although they can be used in limited + // RELATIVE LONG cases. + if (AM.BaseGV) + return false; + + // Require a 20-bit signed offset. + if (!isInt<20>(AM.BaseOffs)) + return false; + + // Indexing is OK but no scale factor can be applied. + return AM.Scale == 0 || AM.Scale == 1; +} + //===----------------------------------------------------------------------===// // Inline asm support //===----------------------------------------------------------------------===// @@ -359,8 +401,24 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, return weight; } +// Parse a "{tNNN}" register constraint for which the register type "t" +// has already been verified. MC is the class associated with "t" and +// Map maps 0-based register numbers to LLVM register numbers. +static std::pair<unsigned, const TargetRegisterClass *> +parseRegisterNumber(const std::string &Constraint, + const TargetRegisterClass *RC, const unsigned *Map) { + assert(*(Constraint.end()-1) == '}' && "Missing '}'"); + if (isdigit(Constraint[2])) { + std::string Suffix(Constraint.data() + 2, Constraint.size() - 2); + unsigned Index = atoi(Suffix.c_str()); + if (Index < 16 && Map[Index]) + return std::make_pair(Map[Index], RC); + } + return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); +} + std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { +getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { @@ -388,6 +446,32 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { return std::make_pair(0U, &SystemZ::FP32BitRegClass); } } + if (Constraint[0] == '{') { + // We need to override the default register parsing for GPRs and FPRs + // because the interpretation depends on VT. The internal names of + // the registers are also different from the external names + // (F0D and F0S instead of F0, etc.). + if (Constraint[1] == 'r') { + if (VT == MVT::i32) + return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, + SystemZMC::GR32Regs); + if (VT == MVT::i128) + return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, + SystemZMC::GR128Regs); + return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, + SystemZMC::GR64Regs); + } + if (Constraint[1] == 'f') { + if (VT == MVT::f32) + return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, + SystemZMC::FP32Regs); + if (VT == MVT::f128) + return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, + SystemZMC::FP128Regs); + return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, + SystemZMC::FP64Regs); + } + } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } @@ -610,9 +694,9 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &DL = CLI.DL; - SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; @@ -827,6 +911,29 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) { } // If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1 +// can be converted to a comparison against zero, adjust the operands +// as necessary. +static void adjustZeroCmp(SelectionDAG &DAG, bool &IsUnsigned, + SDValue &CmpOp0, SDValue &CmpOp1, + unsigned &CCMask) { + if (IsUnsigned) + return; + + ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(CmpOp1.getNode()); + if (!ConstOp1) + return; + + int64_t Value = ConstOp1->getSExtValue(); + if ((Value == -1 && CCMask == SystemZ::CCMASK_CMP_GT) || + (Value == -1 && CCMask == SystemZ::CCMASK_CMP_LE) || + (Value == 1 && CCMask == SystemZ::CCMASK_CMP_LT) || + (Value == 1 && CCMask == SystemZ::CCMASK_CMP_GE)) { + CCMask ^= SystemZ::CCMASK_CMP_EQ; + CmpOp1 = DAG.getConstant(0, CmpOp1.getValueType()); + } +} + +// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1 // is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary. static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, SDValue &CmpOp0, SDValue &CmpOp1, @@ -870,7 +977,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT) // Test whether the high bit of the byte is set. Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true; - else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT) + else if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_GE) // Test whether the high bit of the byte is clear. Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true; else @@ -946,15 +1053,22 @@ static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0, return false; } -// Return a target node that compares CmpOp0 and CmpOp1. Set CCMask to the -// 4-bit condition-code mask for CC. +// Return a target node that compares CmpOp0 with CmpOp1 and stores a +// 2-bit result in CC. Set CCValid to the CCMASK_* of all possible +// 2-bit results and CCMask to the subset of those results that are +// associated with Cond. static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode CC, unsigned &CCMask) { + ISD::CondCode Cond, unsigned &CCValid, + unsigned &CCMask) { bool IsUnsigned = false; - CCMask = CCMaskForCondCode(CC); - if (!CmpOp0.getValueType().isFloatingPoint()) { + CCMask = CCMaskForCondCode(Cond); + if (CmpOp0.getValueType().isFloatingPoint()) + CCValid = SystemZ::CCMASK_FCMP; + else { IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO; - CCMask &= ~SystemZ::CCMASK_CMP_UO; + CCValid = SystemZ::CCMASK_ICMP; + CCMask &= CCValid; + adjustZeroCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask)) IsUnsigned = true; @@ -996,10 +1110,11 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); SDLoc DL(Op); - unsigned CCMask; - SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask); + unsigned CCValid, CCMask; + SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCValid, CCMask); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags); + Chain, DAG.getConstant(CCValid, MVT::i32), + DAG.getConstant(CCMask, MVT::i32), Dest, Flags); } SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, @@ -1011,12 +1126,13 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDLoc DL(Op); - unsigned CCMask; - SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask); + unsigned CCValid, CCMask; + SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCValid, CCMask); - SmallVector<SDValue, 4> Ops; + SmallVector<SDValue, 5> Ops; Ops.push_back(TrueOp); Ops.push_back(FalseOp); + Ops.push_back(DAG.getConstant(CCValid, MVT::i32)); Ops.push_back(DAG.getConstant(CCMask, MVT::i32)); Ops.push_back(Flags); @@ -1269,18 +1385,23 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); SDLoc DL(Op); + unsigned Opcode; // We use DSGF for 32-bit division. if (is32Bit(VT)) { Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); - Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1); - } + Opcode = SystemZISD::SDIVREM32; + } else if (DAG.ComputeNumSignBits(Op1) > 32) { + Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); + Opcode = SystemZISD::SDIVREM32; + } else + Opcode = SystemZISD::SDIVREM64; // DSG(F) takes a 64-bit dividend, so the even register in the GR128 // input is "don't care". The instruction returns the remainder in // the even register and the quotient in the odd register. SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64, + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, 2, DL); } @@ -1579,6 +1700,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SDIVREM64); OPCODE(UDIVREM32); OPCODE(UDIVREM64); + OPCODE(MVC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -1620,34 +1742,6 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, return NewMBB; } -bool SystemZTargetLowering:: -convertPrevCompareToBranch(MachineBasicBlock *MBB, - MachineBasicBlock::iterator MBBI, - unsigned CCMask, MachineBasicBlock *Target) const { - MachineBasicBlock::iterator Compare = MBBI; - MachineBasicBlock::iterator Begin = MBB->begin(); - do - { - if (Compare == Begin) - return false; - --Compare; - } - while (Compare->isDebugValue()); - - const SystemZInstrInfo *TII = TM.getInstrInfo(); - unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), - Compare); - if (!FusedOpcode) - return false; - - DebugLoc DL = Compare->getDebugLoc(); - BuildMI(*MBB, MBBI, DL, TII->get(FusedOpcode)) - .addOperand(Compare->getOperand(0)).addOperand(Compare->getOperand(1)) - .addImm(CCMask).addMBB(Target); - Compare->removeFromParent(); - return true; -} - // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr *MI, @@ -1657,7 +1751,8 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, unsigned DestReg = MI->getOperand(0).getReg(); unsigned TrueReg = MI->getOperand(1).getReg(); unsigned FalseReg = MI->getOperand(2).getReg(); - unsigned CCMask = MI->getOperand(3).getImm(); + unsigned CCValid = MI->getOperand(3).getImm(); + unsigned CCMask = MI->getOperand(4).getImm(); DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock *StartMBB = MBB; @@ -1667,15 +1762,9 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB - // - // The original DAG glues comparisons to their uses, both to ensure - // that no CC-clobbering instructions are inserted between them, and - // to ensure that comparison results are not reused. This means that - // this Select is the sole user of any preceding comparison instruction - // and that we can try to use a fused compare and branch instead. MBB = StartMBB; - if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB)) - BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(FalseMBB); @@ -1696,6 +1785,69 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, return JoinMBB; } +// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. +// StoreOpcode is the store to use and Invert says whether the store should +// happen when the condition is false rather than true. If a STORE ON +// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. +MachineBasicBlock * +SystemZTargetLowering::emitCondStore(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + + unsigned SrcReg = MI->getOperand(0).getReg(); + MachineOperand Base = MI->getOperand(1); + int64_t Disp = MI->getOperand(2).getImm(); + unsigned IndexReg = MI->getOperand(3).getReg(); + unsigned CCValid = MI->getOperand(4).getImm(); + unsigned CCMask = MI->getOperand(5).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); + + // Use STOCOpcode if possible. We could use different store patterns in + // order to avoid matching the index register, but the performance trade-offs + // might be more complicated in that case. + if (STOCOpcode && !IndexReg && TM.getSubtargetImpl()->hasLoadStoreOnCond()) { + if (Invert) + CCMask ^= CCValid; + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp) + .addImm(CCValid).addImm(CCMask); + MI->eraseFromParent(); + return MBB; + } + + // Get the condition needed to branch around the store. + if (!Invert) + CCMask ^= CCValid; + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // BRC CCMask, JoinMBB + // # fallthrough to FalseMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // store %SrcReg, %Disp(%Index,%Base) + // # fallthrough to JoinMBB + MBB = FalseMBB; + BuildMI(MBB, DL, TII->get(StoreOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); + MBB->addSuccessor(JoinMBB); + + MI->eraseFromParent(); + return JoinMBB; +} + // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. @@ -1712,7 +1864,6 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. @@ -1812,7 +1963,8 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); - BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); @@ -1835,7 +1987,6 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. @@ -1897,17 +2048,10 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, if (IsSubWord) BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) .addReg(OldVal).addReg(BitShift).addImm(0); - unsigned FusedOpcode = TII->getCompareAndBranch(CompareOpcode); - if (FusedOpcode) - BuildMI(MBB, DL, TII->get(FusedOpcode)) - .addReg(RotatedOldVal).addReg(Src2) - .addImm(KeepOldMask).addMBB(UpdateMBB); - else { - BuildMI(MBB, DL, TII->get(CompareOpcode)) - .addReg(RotatedOldVal).addReg(Src2); - BuildMI(MBB, DL, TII->get(SystemZ::BRC)) - .addImm(KeepOldMask).addMBB(UpdateMBB); - } + BuildMI(MBB, DL, TII->get(CompareOpcode)) + .addReg(RotatedOldVal).addReg(Src2); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); MBB->addSuccessor(UpdateMBB); MBB->addSuccessor(UseAltMBB); @@ -1937,7 +2081,8 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); - BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); @@ -1953,7 +2098,6 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); // Extract the operands. Base can be a register or a frame index. unsigned Dest = MI->getOperand(0).getReg(); @@ -2009,7 +2153,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, // ^^ Replace the upper 32-BitSize bits of the // comparison value with those that we loaded, // so that we can use a full word comparison. - // CRJNE %Dest, %RetryCmpVal, DoneMBB + // CR %Dest, %RetryCmpVal + // JNE DoneMBB // # Fall through to SetMBB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) @@ -2025,9 +2170,11 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, .addReg(OldVal).addReg(BitShift).addImm(BitSize); BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); - BuildMI(MBB, DL, TII->get(SystemZ::CRJ)) - .addReg(Dest).addReg(RetryCmpVal) - .addImm(MaskNE).addMBB(DoneMBB); + BuildMI(MBB, DL, TII->get(SystemZ::CR)) + .addReg(Dest).addReg(RetryCmpVal); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); MBB->addSuccessor(DoneMBB); MBB->addSuccessor(SetMBB); @@ -2047,7 +2194,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); - BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); @@ -2090,6 +2238,26 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, return MBB; } +MachineBasicBlock * +SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + MachineOperand DestBase = MI->getOperand(0); + uint64_t DestDisp = MI->getOperand(1).getImm(); + MachineOperand SrcBase = MI->getOperand(2); + uint64_t SrcDisp = MI->getOperand(3).getImm(); + uint64_t Length = MI->getOperand(4).getImm(); + + BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC)) + .addOperand(DestBase).addImm(DestDisp).addImm(Length) + .addOperand(SrcBase).addImm(SrcDisp); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -2100,6 +2268,43 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::SelectF128: return emitSelect(MI, MBB); + case SystemZ::CondStore8_32: + return emitCondStore(MI, MBB, SystemZ::STC32, 0, false); + case SystemZ::CondStore8_32Inv: + return emitCondStore(MI, MBB, SystemZ::STC32, 0, true); + case SystemZ::CondStore16_32: + return emitCondStore(MI, MBB, SystemZ::STH32, 0, false); + case SystemZ::CondStore16_32Inv: + return emitCondStore(MI, MBB, SystemZ::STH32, 0, true); + case SystemZ::CondStore32_32: + return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, false); + case SystemZ::CondStore32_32Inv: + return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, true); + case SystemZ::CondStore8: + return emitCondStore(MI, MBB, SystemZ::STC, 0, false); + case SystemZ::CondStore8Inv: + return emitCondStore(MI, MBB, SystemZ::STC, 0, true); + case SystemZ::CondStore16: + return emitCondStore(MI, MBB, SystemZ::STH, 0, false); + case SystemZ::CondStore16Inv: + return emitCondStore(MI, MBB, SystemZ::STH, 0, true); + case SystemZ::CondStore32: + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); + case SystemZ::CondStore32Inv: + return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); + case SystemZ::CondStore64: + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); + case SystemZ::CondStore64Inv: + return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); + case SystemZ::CondStoreF32: + return emitCondStore(MI, MBB, SystemZ::STE, 0, false); + case SystemZ::CondStoreF32Inv: + return emitCondStore(MI, MBB, SystemZ::STE, 0, true); + case SystemZ::CondStoreF64: + return emitCondStore(MI, MBB, SystemZ::STD, 0, false); + case SystemZ::CondStoreF64Inv: + return emitCondStore(MI, MBB, SystemZ::STD, 0, true); + case SystemZ::AEXT128_64: return emitExt128(MI, MBB, false, SystemZ::subreg_low); case SystemZ::ZEXT128_32: @@ -2276,16 +2481,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::ATOMIC_CMP_SWAPW: return emitAtomicCmpSwapW(MI, MBB); - case SystemZ::BRC: - // The original DAG glues comparisons to their uses, both to ensure - // that no CC-clobbering instructions are inserted between them, and - // to ensure that comparison results are not reused. This means that - // a BRC is the sole user of a preceding comparison and that we can - // try to use a fused compare and branch instead. - if (convertPrevCompareToBranch(MBB, MI, MI->getOperand(0).getImm(), - MI->getOperand(1).getMBB())) - MI->eraseFromParent(); - return MBB; + case SystemZ::MVCWrapper: + return emitMVCWrapper(MI, MBB); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index f17e9e4..c0dbe49 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -68,10 +68,18 @@ namespace SystemZISD { // first input operands are GR128s. The trailing numbers are the // widths of the second operand in bits. UMUL_LOHI64, + SDIVREM32, SDIVREM64, UDIVREM32, UDIVREM64, + // Use MVC to copy bytes from one memory location to another. + // The first operand is the target address, the second operand is the + // source address, and the third operand is the constant length. + // This isn't a memory opcode because we'd need to attach two + // MachineMemOperands rather than one. + MVC, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_<op>. // @@ -118,18 +126,19 @@ public: virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE { return MVT::i32; } - virtual EVT getSetCCResultType(LLVMContext &, EVT) const { + virtual EVT getSetCCResultType(LLVMContext &, EVT) const LLVM_OVERRIDE { return MVT::i32; } - virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE { - return true; - } - virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; - virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE; + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const LLVM_OVERRIDE; + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const + LLVM_OVERRIDE; + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const + LLVM_OVERRIDE; virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE; virtual std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const LLVM_OVERRIDE; + MVT VT) const LLVM_OVERRIDE; virtual TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const LLVM_OVERRIDE; virtual TargetLowering::ConstraintWeight @@ -203,6 +212,10 @@ private: // Implement EmitInstrWithCustomInserter for individual operation types. MachineBasicBlock *emitSelect(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitCondStore(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned StoreOpcode, unsigned STOCOpcode, + bool Invert) const; MachineBasicBlock *emitExt128(MachineInstr *MI, MachineBasicBlock *MBB, bool ClearEven, unsigned SubReg) const; @@ -217,6 +230,8 @@ private: unsigned BitSize) const; MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitMVCWrapper(MachineInstr *MI, + MachineBasicBlock *BB) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 86ef14c..b903b51 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Control-flow instructions +// Select instructions //===----------------------------------------------------------------------===// // C's ?: operator for floating-point operands. @@ -16,32 +16,48 @@ def SelectF32 : SelectWrapper<FP32>; def SelectF64 : SelectWrapper<FP64>; def SelectF128 : SelectWrapper<FP128>; +defm CondStoreF32 : CondStores<FP32, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; +defm CondStoreF64 : CondStores<FP64, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; + //===----------------------------------------------------------------------===// // Move instructions //===----------------------------------------------------------------------===// // Load zero. let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { - def LZER : InherentRRE<"lzer", 0xB374, FP32, (fpimm0)>; - def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>; - def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>; + def LZER : InherentRRE<"lze", 0xB374, FP32, (fpimm0)>; + def LZDR : InherentRRE<"lzd", 0xB375, FP64, (fpimm0)>; + def LZXR : InherentRRE<"lzx", 0xB376, FP128, (fpimm0)>; } // Moves between two floating-point registers. let neverHasSideEffects = 1 in { - def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; - def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; - def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; + def LER : UnaryRR <"le", 0x38, null_frag, FP32, FP32>; + def LDR : UnaryRR <"ld", 0x28, null_frag, FP64, FP64>; + def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>; +} + +// Moves between two floating-point registers that also set the condition +// codes. +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + defm LTEBR : LoadAndTestRRE<"lteb", 0xB302, FP32>; + defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; + defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; } +def : CompareZeroFP<LTEBRCompare, FP32>; +def : CompareZeroFP<LTDBRCompare, FP64>; +def : CompareZeroFP<LTXBRCompare, FP128>; // Moves between 64-bit integer and floating-point registers. -def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; -def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>; +def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; +def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>; // fcopysign with an FP32 result. let isCodeGenOnly = 1 in { - def CPSDRss : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>; - def CPSDRsd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>; + def CPSDRss : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP32>; + def CPSDRsd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP64>; } // The sign of an FP128 is in the high register. @@ -50,8 +66,8 @@ def : Pat<(fcopysign FP32:$src1, FP128:$src2), // fcopysign with an FP64 result. let isCodeGenOnly = 1 in - def CPSDRds : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>; -def CPSDRdd : BinaryRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>; + def CPSDRds : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP32>; +def CPSDRdd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP64>; // The sign of an FP128 is in the high register. def : Pat<(fcopysign FP64:$src1, FP128:$src2), @@ -70,13 +86,17 @@ def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high), (EXTRACT_SUBREG FP128:$src2, subreg_high))>; +defm LoadStoreF32 : MVCLoadStore<load, store, f32, MVCWrapper, 4>; +defm LoadStoreF64 : MVCLoadStore<load, store, f64, MVCWrapper, 8>; +defm LoadStoreF128 : MVCLoadStore<load, store, f128, MVCWrapper, 16>; + //===----------------------------------------------------------------------===// // Load instructions //===----------------------------------------------------------------------===// let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { - defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32>; - defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64>; + defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; + defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -91,8 +111,8 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { //===----------------------------------------------------------------------===// let SimpleBDXStore = 1 in { - defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32>; - defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64>; + defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>; + defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -109,9 +129,9 @@ let SimpleBDXStore = 1 in { // Convert floating-point values to narrower representations, rounding // according to the current mode. The destination of LEXBR and LDXBR // is a 128-bit value, but only the first register of the pair is used. -def LEDBR : UnaryRRE<"ledbr", 0xB344, fround, FP32, FP64>; -def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; -def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; +def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>; +def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>; +def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>; def : Pat<(f32 (fround FP128:$src)), (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>; @@ -119,36 +139,34 @@ def : Pat<(f64 (fround FP128:$src)), (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>; // Extend register floating-point values to wider representations. -def LDEBR : UnaryRRE<"ldebr", 0xB304, fextend, FP64, FP32>; -def LXEBR : UnaryRRE<"lxebr", 0xB306, fextend, FP128, FP32>; -def LXDBR : UnaryRRE<"lxdbr", 0xB305, fextend, FP128, FP64>; +def LDEBR : UnaryRRE<"ldeb", 0xB304, fextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxeb", 0xB306, fextend, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdb", 0xB305, fextend, FP128, FP64>; // Extend memory floating-point values to wider representations. -def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64>; -def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128>; -def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128>; +def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>; +def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>; +def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>; // Convert a signed integer register value to a floating-point one. -let Defs = [CC] in { - def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; - def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; - def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; +def CEFBR : UnaryRRE<"cefb", 0xB394, sint_to_fp, FP32, GR32>; +def CDFBR : UnaryRRE<"cdfb", 0xB395, sint_to_fp, FP64, GR32>; +def CXFBR : UnaryRRE<"cxfb", 0xB396, sint_to_fp, FP128, GR32>; - def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; - def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; - def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; -} +def CEGBR : UnaryRRE<"cegb", 0xB3A4, sint_to_fp, FP32, GR64>; +def CDGBR : UnaryRRE<"cdgb", 0xB3A5, sint_to_fp, FP64, GR64>; +def CXGBR : UnaryRRE<"cxgb", 0xB3A6, sint_to_fp, FP128, GR64>; // Convert a floating-point register value to a signed integer value, // with the second operand (modifier M3) specifying the rounding mode. let Defs = [CC] in { - def CFEBR : UnaryRRF<"cfebr", 0xB398, GR32, FP32>; - def CFDBR : UnaryRRF<"cfdbr", 0xB399, GR32, FP64>; - def CFXBR : UnaryRRF<"cfxbr", 0xB39A, GR32, FP128>; + def CFEBR : UnaryRRF<"cfeb", 0xB398, GR32, FP32>; + def CFDBR : UnaryRRF<"cfdb", 0xB399, GR32, FP64>; + def CFXBR : UnaryRRF<"cfxb", 0xB39A, GR32, FP128>; - def CGEBR : UnaryRRF<"cgebr", 0xB3A8, GR64, FP32>; - def CGDBR : UnaryRRF<"cgdbr", 0xB3A9, GR64, FP64>; - def CGXBR : UnaryRRF<"cgxbr", 0xB3AA, GR64, FP128>; + def CGEBR : UnaryRRF<"cgeb", 0xB3A8, GR64, FP32>; + def CGDBR : UnaryRRF<"cgdb", 0xB3A9, GR64, FP64>; + def CGXBR : UnaryRRF<"cgxb", 0xB3AA, GR64, FP128>; } // fp_to_sint always rounds towards zero, which is modifier value 5. @@ -165,33 +183,33 @@ def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; //===----------------------------------------------------------------------===// // Negation (Load Complement). -let Defs = [CC] in { - def LCEBR : UnaryRRE<"lcebr", 0xB303, fneg, FP32, FP32>; - def LCDBR : UnaryRRE<"lcdbr", 0xB313, fneg, FP64, FP64>; - def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>; +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32, FP32>; + def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64, FP64>; + def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>; } // Absolute value (Load Positive). -let Defs = [CC] in { - def LPEBR : UnaryRRE<"lpebr", 0xB300, fabs, FP32, FP32>; - def LPDBR : UnaryRRE<"lpdbr", 0xB310, fabs, FP64, FP64>; - def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>; +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32, FP32>; + def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64, FP64>; + def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>; } // Negative absolute value (Load Negative). -let Defs = [CC] in { - def LNEBR : UnaryRRE<"lnebr", 0xB301, fnabs, FP32, FP32>; - def LNDBR : UnaryRRE<"lndbr", 0xB311, fnabs, FP64, FP64>; - def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>; +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32, FP32>; + def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64, FP64>; + def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>; } // Square root. -def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>; -def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>; -def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>; +def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32, FP32>; +def SQDBR : UnaryRRE<"sqdb", 0xB315, fsqrt, FP64, FP64>; +def SQXBR : UnaryRRE<"sqxb", 0xB316, fsqrt, FP128, FP128>; -def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32>; -def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64>; +def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>; +def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>; // Round to an integer, with the second operand (modifier M3) specifying // the rounding mode. @@ -199,11 +217,9 @@ def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64>; // These forms always check for inexact conditions. z196 added versions // that allow this to suppressed (as for fnearbyint), but we don't yet // support -march=z196. -let Defs = [CC] in { - def FIEBR : UnaryRRF<"fiebr", 0xB357, FP32, FP32>; - def FIDBR : UnaryRRF<"fidbr", 0xB35F, FP64, FP64>; - def FIXBR : UnaryRRF<"fixbr", 0xB347, FP128, FP128>; -} +def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>; +def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>; +def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>; // frint rounds according to the current mode (modifier 0) and detects // inexact conditions. @@ -216,94 +232,94 @@ def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>; //===----------------------------------------------------------------------===// // Addition. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { let isCommutable = 1 in { - def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>; - def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>; - def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>; + def AEBR : BinaryRRE<"aeb", 0xB30A, fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adb", 0xB31A, fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axb", 0xB34A, fadd, FP128, FP128>; } - def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load>; - def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load>; + def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>; + def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>; } // Subtraction. -let Defs = [CC] in { - def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>; - def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>; - def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>; +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { + def SEBR : BinaryRRE<"seb", 0xB30B, fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdb", 0xB31B, fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxb", 0xB34B, fsub, FP128, FP128>; - def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load>; - def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load>; + def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>; + def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>; } // Multiplication. let isCommutable = 1 in { - def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>; - def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>; - def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>; + def MEEBR : BinaryRRE<"meeb", 0xB317, fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdb", 0xB31C, fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxb", 0xB34C, fmul, FP128, FP128>; } -def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load>; -def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load>; +def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>; +def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; // f64 multiplication of two FP32 registers. -def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; +def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>; def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. -def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load>; +def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (extloadf32 bdxaddr12only:$addr))), (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit), bdxaddr12only:$addr)>; // f128 multiplication of two FP64 registers. -def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; +def MXDBR : BinaryRRE<"mxdb", 0xB307, null_frag, FP128, FP64>; def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))), (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high), FP64:$src2)>; // f128 multiplication of an FP64 register and an f64 memory. -def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load>; +def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (extloadf64 bdxaddr12only:$addr))), (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high), bdxaddr12only:$addr)>; // Fused multiply-add. -def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>; -def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>; +def MAEBR : TernaryRRD<"maeb", 0xB30E, z_fma, FP32>; +def MADBR : TernaryRRD<"madb", 0xB31E, z_fma, FP64>; -def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load>; -def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load>; +def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>; +def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>; // Fused multiply-subtract. -def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>; -def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>; +def MSEBR : TernaryRRD<"mseb", 0xB30F, z_fms, FP32>; +def MSDBR : TernaryRRD<"msdb", 0xB31F, z_fms, FP64>; -def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load>; -def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load>; +def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>; +def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>; // Division. -def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; -def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>; -def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>; +def DEBR : BinaryRRE<"deb", 0xB30D, fdiv, FP32, FP32>; +def DDBR : BinaryRRE<"ddb", 0xB31D, fdiv, FP64, FP64>; +def DXBR : BinaryRRE<"dxb", 0xB34D, fdiv, FP128, FP128>; -def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load>; -def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load>; +def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; +def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; //===----------------------------------------------------------------------===// // Comparisons //===----------------------------------------------------------------------===// -let Defs = [CC] in { - def CEBR : CompareRRE<"cebr", 0xB309, z_cmp, FP32, FP32>; - def CDBR : CompareRRE<"cdbr", 0xB319, z_cmp, FP64, FP64>; - def CXBR : CompareRRE<"cxbr", 0xB349, z_cmp, FP128, FP128>; +let Defs = [CC], CCValues = 0xF in { + def CEBR : CompareRRE<"ceb", 0xB309, z_cmp, FP32, FP32>; + def CDBR : CompareRRE<"cdb", 0xB319, z_cmp, FP64, FP64>; + def CXBR : CompareRRE<"cxb", 0xB349, z_cmp, FP128, FP128>; - def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load>; - def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load>; + def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load, 8>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index ad050fd..954df11 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -21,12 +21,24 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, let Pattern = pattern; let AsmString = asmstr; - // Used to identify a group of related instructions, such as ST and STY. - string Function = ""; - - // "12" for an instruction that has a ...Y equivalent, "20" for that - // ...Y equivalent. - string PairType = "none"; + // Some instructions come in pairs, one having a 12-bit displacement + // and the other having a 20-bit displacement. Both instructions in + // the pair have the same DispKey and their DispSizes are "12" and "20" + // respectively. + string DispKey = ""; + string DispSize = "none"; + + // Many register-based <INSN>R instructions have a memory-based <INSN> + // counterpart. OpKey uniquely identifies <INSN>, while OpType is + // "reg" for <INSN>R and "mem" for <INSN>. + string OpKey = ""; + string OpType = "none"; + + // Many distinct-operands instructions have older 2-operand equivalents. + // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs, + // with NumOpsValue being "2" or "3" as appropriate. + string NumOpsKey = ""; + string NumOpsValue = "none"; // True if this instruction is a simple D(X,B) load of a register // (with no sign or zero extension). @@ -46,11 +58,40 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, // operations. bit Is128Bit = 0; - let TSFlags{0} = SimpleBDXLoad; - let TSFlags{1} = SimpleBDXStore; - let TSFlags{2} = Has20BitOffset; - let TSFlags{3} = HasIndex; - let TSFlags{4} = Is128Bit; + // The access size of all memory operands in bytes, or 0 if not known. + bits<5> AccessBytes = 0; + + // If the instruction sets CC to a useful value, this gives the mask + // of all possible CC results. The mask has the same form as + // SystemZ::CCMASK_*. + bits<4> CCValues = 0; + + // The subset of CCValues that have the same meaning as they would after + // a comparison of the first operand against zero. + bits<4> CompareZeroCCMask = 0; + + // True if the instruction is conditional and if the CC mask operand + // comes first (as for BRC, etc.). + bit CCMaskFirst = 0; + + // Similar, but true if the CC mask operand comes last (as for LOC, etc.). + bit CCMaskLast = 0; + + // True if the instruction is the "logical" rather than "arithmetic" form, + // in cases where a distinction exists. + bit IsLogical = 0; + + let TSFlags{0} = SimpleBDXLoad; + let TSFlags{1} = SimpleBDXStore; + let TSFlags{2} = Has20BitOffset; + let TSFlags{3} = HasIndex; + let TSFlags{4} = Is128Bit; + let TSFlags{9-5} = AccessBytes; + let TSFlags{13-10} = CCValues; + let TSFlags{17-14} = CompareZeroCCMask; + let TSFlags{18} = CCMaskFirst; + let TSFlags{19} = CCMaskLast; + let TSFlags{20} = IsLogical; } //===----------------------------------------------------------------------===// @@ -61,8 +102,8 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr, // displacement. def getDisp12Opcode : InstrMapping { let FilterClass = "InstSystemZ"; - let RowFields = ["Function"]; - let ColFields = ["PairType"]; + let RowFields = ["DispKey"]; + let ColFields = ["DispSize"]; let KeyCol = ["20"]; let ValueCols = [["12"]]; } @@ -70,12 +111,30 @@ def getDisp12Opcode : InstrMapping { // Return the version of an instruction that has a signed 20-bit displacement. def getDisp20Opcode : InstrMapping { let FilterClass = "InstSystemZ"; - let RowFields = ["Function"]; - let ColFields = ["PairType"]; + let RowFields = ["DispKey"]; + let ColFields = ["DispSize"]; let KeyCol = ["12"]; let ValueCols = [["20"]]; } +// Return the memory form of a register instruction. +def getMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["OpKey"]; + let ColFields = ["OpType"]; + let KeyCol = ["reg"]; + let ValueCols = [["mem"]]; +} + +// Return the 3-operand form of a 2-operand instruction. +def getThreeOperandOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["NumOpsKey"]; + let ColFields = ["NumOpsValue"]; + let KeyCol = ["2"]; + let ValueCols = [["3"]]; +} + //===----------------------------------------------------------------------===// // Instruction formats //===----------------------------------------------------------------------===// @@ -147,6 +206,23 @@ class InstRIEc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{7-0} = op{7-0}; } +class InstRIEd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<4> R3; + bits<16> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-16} = I2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -383,17 +459,38 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Has20BitOffset = 1; } +class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<24> BDL1; + bits<16> BD2; + + let Inst{47-40} = op; + let Inst{39-16} = BDL1; + let Inst{15-0} = BD2; +} + //===----------------------------------------------------------------------===// // Instruction definitions with semantics //===----------------------------------------------------------------------===// // -// These classes have the form <Category><Format>, where <Format> is one +// These classes have the form [Cond]<Category><Format>, where <Format> is one // of the formats defined above and where <Category> describes the inputs -// and outputs. <Category> can be one of: +// and outputs. "Cond" is used if the instruction is conditional, +// in which case the 4-bit condition-code mask is added as a final operand. +// <Category> can be one of: // // Inherent: // One register output operand and no input operands. // +// BranchUnary: +// One register output operand, one register input operand and +// one branch displacement. The instructions stores a modified +// form of the source register in the destination register and +// branches on the result. +// // Store: // One register or immediate input operand and one address input operand. // The instruction stores the first operand to the address. @@ -455,11 +552,20 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, dag src> : InstRRE<opcode, (outs cls:$R1), (ins), - mnemonic#"\t$R1", + mnemonic#"r\t$R1", [(set cls:$R1, src)]> { let R2 = 0; } +class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls> + : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2), + mnemonic##"\t$R1, $I2", []> { + let isBranch = 1; + let isTerminator = 1; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins bdaddr20only:$BD2), mnemonic#"\t$R1, $R3, $BD2", []> { @@ -479,28 +585,38 @@ class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, } class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr12only> + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), mnemonic#"\t$R1, $XBD2", [(operator cls:$R1, mode:$XBD2)]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayStore = 1; + let AccessBytes = bytes; } class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr20only> + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), mnemonic#"\t$R1, $XBD2", [(operator cls:$R1, mode:$XBD2)]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayStore = 1; + let AccessBytes = bytes; } multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, - SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in - def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>; - let PairType = "20" in - def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>; + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes, + bdxaddr20pair>; } } @@ -536,30 +652,106 @@ class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, SDPatternOperator operator, Immediate imm> { - let Function = mnemonic in { - let PairType = "12" in + let DispKey = mnemonic in { + let DispSize = "12" in def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; - let PairType = "20" in + let DispSize = "20" in def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>; } } +class CondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), + mnemonic#"$R3\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondStoreRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, uimm8zx4:$R3), + mnemonic#"\t$R1, $BD2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; +} + +// Like CondStoreRSY, but with a fixed CC mask. +class FixedCondStoreRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<4> ccmask, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2), + mnemonic#"\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayStore = 1; + let AccessBytes = bytes; + let R3 = ccmask; +} + class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2), - mnemonic#"\t$R1, $R2", - [(set cls1:$R1, (operator cls2:$R2))]>; + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2), - mnemonic#"\t$R1, $R2", - [(set cls1:$R1, (operator cls2:$R2))]>; + mnemonic#"r\t$R1, $R2", + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2), - mnemonic#"\t$R1, $R3, $R2", []>; + mnemonic#"r\t$R1, $R3, $R2", []> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} + +// These instructions are generated by if conversion. The old value of R1 +// is added as an implicit use. +class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3), + mnemonic#"r$R3\t$R1, $R2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let CCMaskLast = 1; +} + +// Like CondUnaryRRF, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, uimm8zx4:$R3), + mnemonic#"r\t$R1, $R2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondUnaryRRF, but with a fixed CC mask. +class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, bits<4> ccmask> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R3 = ccmask; +} class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> @@ -585,45 +777,105 @@ class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, let AddedComplexity = 7; } +class CondUnaryRSY<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), + (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3), + mnemonic#"$R3\t$R1, $BD2", + [(set cls:$R1, + (z_select_ccmask (load bdaddr20only:$BD2), cls:$R1src, + cond4:$valid, cond4:$R3))]>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let AccessBytes = bytes; + let CCMaskLast = 1; +} + +// Like CondUnaryRSY, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondUnaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, uimm8zx4:$R3), + mnemonic#"\t$R1, $BD2, $R3", []>, + Requires<[FeatureLoadStoreOnCond]> { + let mayLoad = 1; + let AccessBytes = bytes; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +// Like CondUnaryRSY, but with a fixed CC mask. +class FixedCondUnaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls, bits<4> ccmask, bits<5> bytes, + AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2), + mnemonic#"\t$R1, $BD2", []>, + Requires<[FeatureLoadStoreOnCond]> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R3 = ccmask; + let mayLoad = 1; + let AccessBytes = bytes; +} + class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr12only> + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr12only> : InstRX<opcode, (outs cls:$R1), (ins mode:$XBD2), mnemonic#"\t$R1, $XBD2", [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls> + RegisterOperand cls, bits<5> bytes> : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2), mnemonic#"\t$R1, $XBD2", [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdxaddr20only> + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdxaddr20only> : InstRXY<opcode, (outs cls:$R1), (ins mode:$XBD2), mnemonic#"\t$R1, $XBD2", [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let mayLoad = 1; + let AccessBytes = bytes; } multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, - SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in - def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>; - let PairType = "20" in - def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>; + SDPatternOperator operator, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>; + let DispSize = "20" in + def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bytes, + bdxaddr20pair>; } } class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), - mnemonic#"\t$R1, $R2", + mnemonic#"r\t$R1, $R2", [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } @@ -631,8 +883,10 @@ class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), - mnemonic#"\t$R1, $R2", + mnemonic#"r\t$R1, $R2", [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } @@ -640,8 +894,41 @@ class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R3, cls2:$R2), - mnemonic#"\t$R1, $R3, $R2", - [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]>; + mnemonic#"r\t$R1, $R3, $R2", + [(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; +} + +class BinaryRRFK<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3), + mnemonic#"rk\t$R1, $R2, $R3", + [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]>; + +multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>; + } +} + +multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls1, + RegisterOperand cls2> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>; + } +} class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> @@ -652,6 +939,24 @@ class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, let DisableEncoding = "$R1src"; } +class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2), + mnemonic#"\t$R1, $R3, $I2", + [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; + +multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls, + Immediate imm> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>; + } +} + class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIL<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), @@ -662,46 +967,56 @@ class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, } class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> : InstRX<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), mnemonic#"\t$R1, $XBD2", [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2), mnemonic#"\t$R1, $XBD2", [(set cls:$R1, (operator cls:$R1src, (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> : InstRXY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2), mnemonic#"\t$R1, $XBD2", [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, - SDPatternOperator load> { - let Function = mnemonic ## #cls in { - let PairType = "12" in - def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bdxaddr12pair>; - let PairType = "20" in - def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes, + bdxaddr12pair>; + let DispSize = "20" in + def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, bytes, bdxaddr20pair>; } } @@ -727,59 +1042,83 @@ class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, SDPatternOperator operator, Operand imm> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; - let PairType = "20" in + let DispSize = "20" in def Y : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>; } } class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode> - : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2), + RegisterOperand cls> + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2), mnemonic#"\t$R1, $BD2", - [(set cls:$R1, (operator cls:$R1src, mode:$BD2))]> { + [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> { let R3 = 0; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode> - : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2), + RegisterOperand cls> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2), mnemonic#"\t$R1, $R3, $BD2", - [(set cls:$R1, (operator cls:$R3, mode:$BD2))]>; + [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>; + +multiclass ShiftRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : ShiftRSY<mnemonic##"k", opcode2, null_frag, cls>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : ShiftRS<mnemonic, opcode1, operator, cls>; + } +} class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2), - mnemonic#"\t$R1, $R2", - [(operator cls1:$R1, cls2:$R2)]>; + mnemonic#"r\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let isCompare = 1; +} class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2), - mnemonic#"\t$R1, $R2", - [(operator cls1:$R1, cls2:$R2)]>; + mnemonic#"r\t$R1, $R2", + [(operator cls1:$R1, cls2:$R2)]> { + let OpKey = mnemonic ## cls1; + let OpType = "reg"; + let isCompare = 1; +} class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRI<opcode, (outs), (ins cls:$R1, imm:$I2), mnemonic#"\t$R1, $I2", - [(operator cls:$R1, imm:$I2)]>; + [(operator cls:$R1, imm:$I2)]> { + let isCompare = 1; +} class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2), mnemonic#"\t$R1, $I2", - [(operator cls:$R1, imm:$I2)]>; + [(operator cls:$R1, imm:$I2)]> { + let isCompare = 1; +} class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load> : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), mnemonic#"\t$R1, $I2", [(operator cls:$R1, (load pcrel32:$I2))]> { + let isCompare = 1; let mayLoad = 1; // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more @@ -788,41 +1127,53 @@ class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, } class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2), mnemonic#"\t$R1, $XBD2", [(operator cls:$R1, (load mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; let mayLoad = 1; + let AccessBytes = bytes; } class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2), mnemonic#"\t$R1, $XBD2", [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; let mayLoad = 1; + let AccessBytes = bytes; } class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2), mnemonic#"\t$R1, $XBD2", [(operator cls:$R1, (load mode:$XBD2))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; + let isCompare = 1; let mayLoad = 1; + let AccessBytes = bytes; } multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, - SDPatternOperator load> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : CompareRX<mnemonic, rxOpcode, operator, cls, - load, bdxaddr12pair>; - let PairType = "20" in + load, bytes, bdxaddr12pair>; + let DispSize = "20" in def Y : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls, - load, bdxaddr20pair>; + load, bytes, bdxaddr20pair>; } } @@ -832,6 +1183,7 @@ class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2), mnemonic#"\t$BD1, $I2", [(operator (load mode:$BD1), imm:$I2)]> { + let isCompare = 1; let mayLoad = 1; } @@ -840,6 +1192,7 @@ class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2), mnemonic#"\t$BD1, $I2", [(operator (load bdaddr12only:$BD1), imm:$I2)]> { + let isCompare = 1; let mayLoad = 1; } @@ -849,16 +1202,17 @@ class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2), mnemonic#"\t$BD1, $I2", [(operator (load mode:$BD1), imm:$I2)]> { + let isCompare = 1; let mayLoad = 1; } multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm> { - let Function = mnemonic in { - let PairType = "12" in + let DispKey = mnemonic in { + let DispSize = "12" in def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>; - let PairType = "20" in + let DispSize = "20" in def Y : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm, bdaddr20pair>; } @@ -867,22 +1221,27 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls> : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2), - mnemonic#"\t$R1, $R3, $R2", + mnemonic#"r\t$R1, $R3, $R2", [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> { + let OpKey = mnemonic ## cls; + let OpType = "reg"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load> + RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXF<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2), mnemonic#"\t$R1, $R3, $XBD2", [(set cls:$R1, (operator cls:$R1src, cls:$R3, (load bdxaddr12only:$XBD2)))]> { + let OpKey = mnemonic ## cls; + let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; + let AccessBytes = bytes; } class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, @@ -909,10 +1268,10 @@ class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, SDPatternOperator operator, RegisterOperand cls> { - let Function = mnemonic ## #cls in { - let PairType = "12" in + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>; - let PairType = "20" in + let DispSize = "20" in def Y : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>; } } @@ -920,13 +1279,21 @@ multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRIEf<opcode, (outs cls1:$R1), - (ins cls1:$R1src, cls2:$R2, - uimm8zx6:$I3, uimm8zx6:$I4, uimm8zx6:$I5), + (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } +// A floating-point load-and test operation. Create both a normal unary +// operation and one that acts as a comparison against zero. +multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls> { + def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>; + let isCodeGenOnly = 1 in + def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>; +} + //===----------------------------------------------------------------------===// // Pseudo instructions //===----------------------------------------------------------------------===// @@ -946,8 +1313,10 @@ class Pseudo<dag outs, dag ins, list<dag> pattern> // Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is // the value of the PSW's 2-bit condition code field. class SelectWrapper<RegisterOperand cls> - : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, i8imm:$cc), - [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, imm:$cc))]> { + : Pseudo<(outs cls:$dst), + (ins cls:$src1, cls:$src2, uimm8zx4:$valid, uimm8zx4:$cc), + [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, + uimm8zx4:$valid, uimm8zx4:$cc))]> { let usesCustomInserter = 1; // Although the instructions used by these nodes do not in themselves // change CC, the insertion requires new blocks, and CC cannot be live @@ -956,6 +1325,23 @@ class SelectWrapper<RegisterOperand cls> let Uses = [CC]; } +// Stores $new to $addr if $cc is true ("" case) or false (Inv case). +multiclass CondStores<RegisterOperand cls, SDPatternOperator store, + SDPatternOperator load, AddressingMode mode> { + let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { + def "" : Pseudo<(outs), + (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc), + [(store (z_select_ccmask cls:$new, (load mode:$addr), + uimm8zx4:$valid, uimm8zx4:$cc), + mode:$addr)]>; + def Inv : Pseudo<(outs), + (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc), + [(store (z_select_ccmask (load mode:$addr), cls:$new, + uimm8zx4:$valid, uimm8zx4:$cc), + mode:$addr)]>; + } +} + // OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND // describe the second (non-memory) operand. class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 0d30432..9ee60aa 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "SystemZInstrInfo.h" +#include "SystemZTargetMachine.h" #include "SystemZInstrBuilder.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #define GET_INSTRINFO_CTOR #define GET_INSTRMAP_INFO @@ -21,9 +23,14 @@ using namespace llvm; +// Return a mask with Count low bits set. +static uint64_t allOnes(unsigned int Count) { + return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; +} + SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP), - RI(tm) { + RI(tm), TM(tm) { } // MI is a 128-bit load or store. Split it into two 64-bit loads or stores, @@ -80,7 +87,8 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { // Return 0 otherwise. // // Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. -static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) { +static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, + unsigned Flag) { const MCInstrDesc &MCID = MI->getDesc(); if ((MCID.TSFlags & Flag) && MI->getOperand(1).isFI() && @@ -102,6 +110,31 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore); } +bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr *MI, + int &DestFrameIndex, + int &SrcFrameIndex) const { + // Check for MVC 0(Length,FI1),0(FI2) + const MachineFrameInfo *MFI = MI->getParent()->getParent()->getFrameInfo(); + if (MI->getOpcode() != SystemZ::MVC || + !MI->getOperand(0).isFI() || + MI->getOperand(1).getImm() != 0 || + !MI->getOperand(3).isFI() || + MI->getOperand(4).getImm() != 0) + return false; + + // Check that Length covers the full slots. + int64_t Length = MI->getOperand(2).getImm(); + unsigned FI1 = MI->getOperand(0).getIndex(); + unsigned FI2 = MI->getOperand(3).getIndex(); + if (MFI->getObjectSize(FI1) != Length || + MFI->getObjectSize(FI2) != Length) + return false; + + DestFrameIndex = FI1; + SrcFrameIndex = FI2; + return true; +} + bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -168,13 +201,13 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // FIXME: add X86-style branch swap FBB = TBB; TBB = Branch.Target->getMBB(); + Cond.push_back(MachineOperand::CreateImm(Branch.CCValid)); Cond.push_back(MachineOperand::CreateImm(Branch.CCMask)); continue; } // Handle subsequent conditional branches. - assert(Cond.size() == 1); - assert(TBB); + assert(Cond.size() == 2 && TBB && "Should have seen a conditional branch"); // Only handle the case where all conditional branches branch to the same // destination. @@ -182,11 +215,13 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; // If the conditions are the same, we can leave them alone. - unsigned OldCond = Cond[0].getImm(); - if (OldCond == Branch.CCMask) + unsigned OldCCValid = Cond[0].getImm(); + unsigned OldCCMask = Cond[1].getImm(); + if (OldCCValid == Branch.CCValid && OldCCMask == Branch.CCMask) continue; // FIXME: Try combining conditions like X86 does. Should be easy on Z! + return false; } return false; @@ -214,6 +249,13 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return Count; } +bool SystemZInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 2 && "Invalid condition"); + Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm()); + return false; +} + unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -225,7 +267,7 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 1 || Cond.size() == 0) && + assert((Cond.size() == 2 || Cond.size() == 0) && "SystemZ branch conditions have one component!"); if (Cond.empty()) { @@ -237,8 +279,10 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Conditional branch. unsigned Count = 0; - unsigned CC = Cond[0].getImm(); - BuildMI(&MBB, DL, get(SystemZ::BRC)).addImm(CC).addMBB(TBB); + unsigned CCValid = Cond[0].getImm(); + unsigned CCMask = Cond[1].getImm(); + BuildMI(&MBB, DL, get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask).addMBB(TBB); ++Count; if (FBB) { @@ -249,6 +293,62 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } +// If Opcode is a move that has a conditional variant, return that variant, +// otherwise return 0. +static unsigned getConditionalMove(unsigned Opcode) { + switch (Opcode) { + case SystemZ::LR: return SystemZ::LOCR; + case SystemZ::LGR: return SystemZ::LOCGR; + default: return 0; + } +} + +bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); + if (TM.getSubtargetImpl()->hasLoadStoreOnCond() && + getConditionalMove(Opcode)) + return true; + return false; +} + +bool SystemZInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + const BranchProbability &Probability) const { + // For now only convert single instructions. + return NumCycles == 1; +} + +bool SystemZInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumCyclesT, unsigned ExtraPredCyclesT, + MachineBasicBlock &FMBB, + unsigned NumCyclesF, unsigned ExtraPredCyclesF, + const BranchProbability &Probability) const { + // For now avoid converting mutually-exclusive cases. + return false; +} + +bool SystemZInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + assert(Pred.size() == 2 && "Invalid condition"); + unsigned CCValid = Pred[0].getImm(); + unsigned CCMask = Pred[1].getImm(); + assert(CCMask > 0 && CCMask < 15 && "Invalid predicate"); + unsigned Opcode = MI->getOpcode(); + if (TM.getSubtargetImpl()->hasLoadStoreOnCond()) { + if (unsigned CondOpcode = getConditionalMove(Opcode)) { + MI->setDesc(get(CondOpcode)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(CCValid).addImm(CCMask) + .addReg(SystemZ::CC, RegState::Implicit);; + return true; + } + } + return false; +} + void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, @@ -315,6 +415,223 @@ SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, FrameIdx); } +// Return true if MI is a simple load or store with a 12-bit displacement +// and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. +static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) { + const MCInstrDesc &MCID = MI->getDesc(); + return ((MCID.TSFlags & Flag) && + isUInt<12>(MI->getOperand(2).getImm()) && + MI->getOperand(3).getReg() == 0); +} + +namespace { + struct LogicOp { + LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {} + LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize) + : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {} + + operator bool() const { return RegSize; } + + unsigned RegSize, ImmLSB, ImmSize; + }; +} + +static LogicOp interpretAndImmediate(unsigned Opcode) { + switch (Opcode) { + case SystemZ::NILL32: return LogicOp(32, 0, 16); + case SystemZ::NILH32: return LogicOp(32, 16, 16); + case SystemZ::NILL: return LogicOp(64, 0, 16); + case SystemZ::NILH: return LogicOp(64, 16, 16); + case SystemZ::NIHL: return LogicOp(64, 32, 16); + case SystemZ::NIHH: return LogicOp(64, 48, 16); + case SystemZ::NILF32: return LogicOp(32, 0, 32); + case SystemZ::NILF: return LogicOp(64, 0, 32); + case SystemZ::NIHF: return LogicOp(64, 32, 32); + default: return LogicOp(); + } +} + +// Used to return from convertToThreeAddress after replacing two-address +// instruction OldMI with three-address instruction NewMI. +static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI, + MachineInstr *NewMI, + LiveVariables *LV) { + if (LV) { + unsigned NumOps = OldMI->getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = OldMI->getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), OldMI, NewMI); + } + } + return NewMI; +} + +MachineInstr * +SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + MachineInstr *MI = MBBI; + MachineBasicBlock *MBB = MI->getParent(); + + unsigned Opcode = MI->getOpcode(); + unsigned NumOps = MI->getNumOperands(); + + // Try to convert something like SLL into SLLK, if supported. + // We prefer to keep the two-operand form where possible both + // because it tends to be shorter and because some instructions + // have memory forms that can be used during spilling. + if (TM.getSubtargetImpl()->hasDistinctOps()) { + int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); + if (ThreeOperandOpcode >= 0) { + MachineOperand &Dest = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + MachineInstrBuilder MIB = + BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode)) + .addOperand(Dest); + // Keep the kill state, but drop the tied flag. + MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); + // Keep the remaining operands as-is. + for (unsigned I = 2; I < NumOps; ++I) + MIB.addOperand(MI->getOperand(I)); + return finishConvertToThreeAddress(MI, MIB, LV); + } + } + + // Try to convert an AND into an RISBG-type instruction. + if (LogicOp And = interpretAndImmediate(Opcode)) { + unsigned NewOpcode; + if (And.RegSize == 64) + NewOpcode = SystemZ::RISBG; + else if (TM.getSubtargetImpl()->hasHighWord()) + NewOpcode = SystemZ::RISBLG32; + else + // We can't use RISBG for 32-bit operations because it clobbers the + // high word of the destination too. + NewOpcode = 0; + if (NewOpcode) { + uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB; + // AND IMMEDIATE leaves the other bits of the register unchanged. + Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); + unsigned Start, End; + if (isRxSBGMask(Imm, And.RegSize, Start, End)) { + if (NewOpcode == SystemZ::RISBLG32) { + Start &= 31; + End &= 31; + } + MachineOperand &Dest = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode)) + .addOperand(Dest).addReg(0) + .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()) + .addImm(Start).addImm(End + 128).addImm(0); + return finishConvertToThreeAddress(MI, MIB, LV); + } + } + } + return 0; +} + +MachineInstr * +SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = MFI->getObjectSize(FrameIndex); + + // Eary exit for cases we don't care about + if (Ops.size() != 1) + return 0; + + unsigned OpNum = Ops[0]; + assert(Size == MF.getRegInfo() + .getRegClass(MI->getOperand(OpNum).getReg())->getSize() && + "Invalid size combination"); + + unsigned Opcode = MI->getOpcode(); + if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { + bool Op0IsGPR = (Opcode == SystemZ::LGDR); + bool Op1IsGPR = (Opcode == SystemZ::LDGR); + // If we're spilling the destination of an LDGR or LGDR, store the + // source register instead. + if (OpNum == 0) { + unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; + return BuildMI(MF, MI->getDebugLoc(), get(StoreOpcode)) + .addOperand(MI->getOperand(1)).addFrameIndex(FrameIndex) + .addImm(0).addReg(0); + } + // If we're spilling the source of an LDGR or LGDR, load the + // destination register instead. + if (OpNum == 1) { + unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; + unsigned Dest = MI->getOperand(0).getReg(); + return BuildMI(MF, MI->getDebugLoc(), get(LoadOpcode), Dest) + .addFrameIndex(FrameIndex).addImm(0).addReg(0); + } + } + + // Look for cases where the source of a simple store or the destination + // of a simple load is being spilled. Try to use MVC instead. + // + // Although MVC is in practice a fast choice in these cases, it is still + // logically a bytewise copy. This means that we cannot use it if the + // load or store is volatile. It also means that the transformation is + // not valid in cases where the two memories partially overlap; however, + // that is not a problem here, because we know that one of the memories + // is a full frame index. + if (OpNum == 0 && MI->hasOneMemOperand()) { + MachineMemOperand *MMO = *MI->memoperands_begin(); + if (MMO->getSize() == Size && !MMO->isVolatile()) { + // Handle conversion of loads. + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { + return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) + .addFrameIndex(FrameIndex).addImm(0).addImm(Size) + .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) + .addMemOperand(MMO); + } + // Handle conversion of stores. + if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { + return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) + .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) + .addImm(Size).addFrameIndex(FrameIndex).addImm(0) + .addMemOperand(MMO); + } + } + } + + // If the spilled operand is the final one, try to change <INSN>R + // into <INSN>. + int MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode >= 0) { + unsigned NumOps = MI->getNumExplicitOperands(); + if (OpNum == NumOps - 1) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode)); + for (unsigned I = 0; I < OpNum; ++I) + MIB.addOperand(MI->getOperand(I)); + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + return MIB; + } + } + + return 0; +} + +MachineInstr * +SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; +} + bool SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch (MI->getOpcode()) { @@ -343,13 +660,6 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { } } -bool SystemZInstrInfo:: -ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { - assert(Cond.size() == 1 && "Invalid branch condition!"); - Cond[0].setImm(Cond[0].getImm() ^ SystemZ::CCMASK_ANY); - return false; -} - uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const { if (MI->getOpcode() == TargetOpcode::INLINEASM) { const MachineFunction *MF = MI->getParent()->getParent(); @@ -366,22 +676,31 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const { case SystemZ::J: case SystemZ::JG: return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY, - &MI->getOperand(0)); + SystemZ::CCMASK_ANY, &MI->getOperand(0)); case SystemZ::BRC: case SystemZ::BRCL: return SystemZII::Branch(SystemZII::BranchNormal, - MI->getOperand(0).getImm(), &MI->getOperand(1)); + MI->getOperand(0).getImm(), + MI->getOperand(1).getImm(), &MI->getOperand(2)); + + case SystemZ::BRCT: + return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP, + SystemZ::CCMASK_CMP_NE, &MI->getOperand(2)); + + case SystemZ::BRCTG: + return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP, + SystemZ::CCMASK_CMP_NE, &MI->getOperand(2)); case SystemZ::CIJ: case SystemZ::CRJ: - return SystemZII::Branch(SystemZII::BranchC, MI->getOperand(2).getImm(), - &MI->getOperand(3)); + return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); case SystemZ::CGIJ: case SystemZ::CGRJ: - return SystemZII::Branch(SystemZII::BranchCG, MI->getOperand(2).getImm(), - &MI->getOperand(3)); + return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP, + MI->getOperand(2).getImm(), &MI->getOperand(3)); default: llvm_unreachable("Unrecognized branch opcode"); @@ -442,6 +761,64 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, return 0; } +unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { + switch (Opcode) { + case SystemZ::L: return SystemZ::LT; + case SystemZ::LY: return SystemZ::LT; + case SystemZ::LG: return SystemZ::LTG; + case SystemZ::LGF: return SystemZ::LTGF; + case SystemZ::LR: return SystemZ::LTR; + case SystemZ::LGFR: return SystemZ::LTGFR; + case SystemZ::LGR: return SystemZ::LTGR; + case SystemZ::LER: return SystemZ::LTEBR; + case SystemZ::LDR: return SystemZ::LTDBR; + case SystemZ::LXR: return SystemZ::LTXBR; + default: return 0; + } +} + +// Return true if Mask matches the regexp 0*1+0*, given that zero masks +// have already been filtered out. Store the first set bit in LSB and +// the number of set bits in Length if so. +static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) { + unsigned First = findFirstSet(Mask); + uint64_t Top = (Mask >> First) + 1; + if ((Top & -Top) == Top) { + LSB = First; + Length = findFirstSet(Top); + return true; + } + return false; +} + +bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const { + // Reject trivial all-zero masks. + if (Mask == 0) + return false; + + // Handle the 1+0+ or 0+1+0* cases. Start then specifies the index of + // the msb and End specifies the index of the lsb. + unsigned LSB, Length; + if (isStringOfOnes(Mask, LSB, Length)) { + Start = 63 - (LSB + Length - 1); + End = 63 - LSB; + return true; + } + + // Handle the wrap-around 1+0+1+ cases. Start then specifies the msb + // of the low 1s and End specifies the lsb of the high 1s. + if (isStringOfOnes(Mask ^ allOnes(BitSize), LSB, Length)) { + assert(LSB > 0 && "Bottom bit must be set"); + assert(LSB + Length < BitSize && "Top bit must be set"); + Start = 63 - (LSB - 1); + End = 63 - (LSB + Length); + return true; + } + + return false; +} + unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode, const MachineInstr *MI) const { switch (Opcode) { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index d6980f7..276fd3b 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -28,12 +28,31 @@ class SystemZTargetMachine; namespace SystemZII { enum { // See comments in SystemZInstrFormats.td. - SimpleBDXLoad = (1 << 0), - SimpleBDXStore = (1 << 1), - Has20BitOffset = (1 << 2), - HasIndex = (1 << 3), - Is128Bit = (1 << 4) + SimpleBDXLoad = (1 << 0), + SimpleBDXStore = (1 << 1), + Has20BitOffset = (1 << 2), + HasIndex = (1 << 3), + Is128Bit = (1 << 4), + AccessSizeMask = (31 << 5), + AccessSizeShift = 5, + CCValuesMask = (15 << 10), + CCValuesShift = 10, + CompareZeroCCMaskMask = (15 << 14), + CompareZeroCCMaskShift = 14, + CCMaskFirst = (1 << 18), + CCMaskLast = (1 << 19), + IsLogical = (1 << 20) }; + static inline unsigned getAccessSize(unsigned int Flags) { + return (Flags & AccessSizeMask) >> AccessSizeShift; + } + static inline unsigned getCCValues(unsigned int Flags) { + return (Flags & CCValuesMask) >> CCValuesShift; + } + static inline unsigned getCompareZeroCCMask(unsigned int Flags) { + return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift; + } + // SystemZ MachineOperand target flags. enum { // Masks out the bits for the access model. @@ -53,26 +72,39 @@ namespace SystemZII { // An instruction that peforms a 64-bit signed comparison and branches // on the result. - BranchCG + BranchCG, + + // An instruction that decrements a 32-bit register and branches if + // the result is nonzero. + BranchCT, + + // An instruction that decrements a 64-bit register and branches if + // the result is nonzero. + BranchCTG }; // Information about a branch instruction. struct Branch { // The type of the branch. BranchType Type; + // CCMASK_<N> is set if CC might be equal to N. + unsigned CCValid; + // CCMASK_<N> is set if the branch should be taken when CC == N. unsigned CCMask; // The target of the branch. const MachineOperand *Target; - Branch(BranchType type, unsigned ccMask, const MachineOperand *target) - : Type(type), CCMask(ccMask), Target(target) {} + Branch(BranchType type, unsigned ccValid, unsigned ccMask, + const MachineOperand *target) + : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {} }; } class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; + SystemZTargetMachine &TM; void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; @@ -85,6 +117,8 @@ public: int &FrameIndex) const LLVM_OVERRIDE; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const LLVM_OVERRIDE; + virtual bool isStackSlotCopy(const MachineInstr *MI, int &DestFrameIndex, + int &SrcFrameIndex) const LLVM_OVERRIDE; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -95,6 +129,23 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const LLVM_OVERRIDE; + virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE; + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const + LLVM_OVERRIDE; + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumCyclesT, + unsigned ExtraPredCyclesT, + MachineBasicBlock &FMBB, + unsigned NumCyclesF, + unsigned ExtraPredCyclesF, + const BranchProbability &Probability) const + LLVM_OVERRIDE; + virtual bool + PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const + LLVM_OVERRIDE; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -111,6 +162,18 @@ public: unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; + virtual MachineInstr * + convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + virtual MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + virtual MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE; virtual bool @@ -141,6 +204,16 @@ public: // exists. unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + // If Opcode is a load instruction that has a LOAD AND TEST form, + // return the opcode for the testing form, otherwise return 0. + unsigned getLoadAndTest(unsigned Opcode) const; + + // Return true if ROTATE AND ... SELECTED BITS can be used to select bits + // Mask of the R2 operand, given that only the low BitSize bits of Mask are + // significant. Set Start and End to the I3 and I4 operands if so. + bool isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const; + // If Opcode is a COMPARE opcode for which an associated COMPARE AND // BRANCH exists, return the opcode for the latter, otherwise return 0. // MI, if nonnull, is the compare instruction. diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index c9ec6bc..b318d67 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -58,22 +58,19 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { // in their raw BRC/BRCL form, with the 4-bit condition-code mask being // the first operand. It seems friendlier to use mnemonic forms like // JE and JLH when writing out the assembly though. -// -// Using a custom inserter for BRC gives us a chance to convert the BRC -// and a preceding compare into a single compare-and-branch instruction. -// The inserter makes no change in cases where a separate branch really -// is needed. -multiclass CondBranches<Operand ccmask, string short, string long> { - let isBranch = 1, isTerminator = 1, Uses = [CC] in { - def "" : InstRI<0xA74, (outs), (ins ccmask:$R1, brtarget16:$I2), short, []>; - def L : InstRIL<0xC04, (outs), (ins ccmask:$R1, brtarget32:$I2), long, []>; +let isBranch = 1, isTerminator = 1, Uses = [CC] in { + let isCodeGenOnly = 1, CCMaskFirst = 1 in { + def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1, + brtarget16:$I2), "j$R1\t$I2", + [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>; + def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1, + brtarget32:$I2), "jg$R1\t$I2", []>; } + def AsmBRC : InstRI<0xA74, (outs), (ins uimm8zx4:$R1, brtarget16:$I2), + "brc\t$R1, $I2", []>; + def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2), + "brcl\t$R1, $I2", []>; } -let isCodeGenOnly = 1, usesCustomInserter = 1 in - defm BRC : CondBranches<cond4, "j$R1\t$I2", "jg$R1\t$I2">; -defm AsmBRC : CondBranches<uimm8zx4, "brc\t$R1, $I2", "brcl\t$R1, $I2">; - -def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRC cond4:$cond, bb:$dst)>; // Fused compare-and-branch instructions. As for normal branches, // we handle these instructions internally in their raw CRJ-like form, @@ -107,26 +104,32 @@ defm AsmC : CompareBranches<uimm8zx4, "", "$M3, ">; // (integer or floating-point) multiclass CondExtendedMnemonic<bits<4> ccmask, string name> { let R1 = ccmask in { - def "" : InstRI<0xA74, (outs), (ins brtarget16:$I2), - "j"##name##"\t$I2", []>; - def L : InstRIL<0xC04, (outs), (ins brtarget32:$I2), + def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), + "j"##name##"\t$I2", []>; + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg"##name##"\t$I2", []>; } + def LOCR : FixedCondUnaryRRF<"locr"##name, 0xB9F2, GR32, GR32, ccmask>; + def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>; + def LOC : FixedCondUnaryRSY<"loc"##name, 0xEBF2, GR32, ccmask, 4>; + def LOCG : FixedCondUnaryRSY<"locg"##name, 0xEBE2, GR64, ccmask, 8>; + def STOC : FixedCondStoreRSY<"stoc"##name, 0xEBF3, GR32, ccmask, 4>; + def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>; } -defm AsmJO : CondExtendedMnemonic<1, "o">; -defm AsmJH : CondExtendedMnemonic<2, "h">; -defm AsmJNLE : CondExtendedMnemonic<3, "nle">; -defm AsmJL : CondExtendedMnemonic<4, "l">; -defm AsmJNHE : CondExtendedMnemonic<5, "nhe">; -defm AsmJLH : CondExtendedMnemonic<6, "lh">; -defm AsmJNE : CondExtendedMnemonic<7, "ne">; -defm AsmJE : CondExtendedMnemonic<8, "e">; -defm AsmJNLH : CondExtendedMnemonic<9, "nlh">; -defm AsmJHE : CondExtendedMnemonic<10, "he">; -defm AsmJNL : CondExtendedMnemonic<11, "nl">; -defm AsmJLE : CondExtendedMnemonic<12, "le">; -defm AsmJNH : CondExtendedMnemonic<13, "nh">; -defm AsmJNO : CondExtendedMnemonic<14, "no">; +defm AsmO : CondExtendedMnemonic<1, "o">; +defm AsmH : CondExtendedMnemonic<2, "h">; +defm AsmNLE : CondExtendedMnemonic<3, "nle">; +defm AsmL : CondExtendedMnemonic<4, "l">; +defm AsmNHE : CondExtendedMnemonic<5, "nhe">; +defm AsmLH : CondExtendedMnemonic<6, "lh">; +defm AsmNE : CondExtendedMnemonic<7, "ne">; +defm AsmE : CondExtendedMnemonic<8, "e">; +defm AsmNLH : CondExtendedMnemonic<9, "nlh">; +defm AsmHE : CondExtendedMnemonic<10, "he">; +defm AsmNL : CondExtendedMnemonic<11, "nl">; +defm AsmLE : CondExtendedMnemonic<12, "le">; +defm AsmNH : CondExtendedMnemonic<13, "nh">; +defm AsmNO : CondExtendedMnemonic<14, "no">; // Define AsmParser mnemonics for each integer condition-code mask. // This is like the list above, except that condition 3 is not possible @@ -163,16 +166,43 @@ defm AsmJE : IntCondExtendedMnemonic<8, "e", "nlh">; defm AsmJHE : IntCondExtendedMnemonic<10, "he", "nl">; defm AsmJLE : IntCondExtendedMnemonic<12, "le", "nh">; +// Decrement a register and branch if it is nonzero. These don't clobber CC, +// but we might need to split long branches into sequences that do. +let Defs = [CC] in { + def BRCT : BranchUnaryRI<"brct", 0xA76, GR32>; + def BRCTG : BranchUnaryRI<"brctg", 0xA77, GR64>; +} + +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + def Select32 : SelectWrapper<GR32>; def Select64 : SelectWrapper<GR64>; +defm CondStore8_32 : CondStores<GR32, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>; +defm CondStore16_32 : CondStores<GR32, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>; +defm CondStore32_32 : CondStores<GR32, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; + +defm CondStore8 : CondStores<GR64, nonvolatile_truncstorei8, + nonvolatile_anyextloadi8, bdxaddr20only>; +defm CondStore16 : CondStores<GR64, nonvolatile_truncstorei16, + nonvolatile_anyextloadi16, bdxaddr20only>; +defm CondStore32 : CondStores<GR64, nonvolatile_truncstorei32, + nonvolatile_anyextloadi32, bdxaddr20only>; +defm CondStore64 : CondStores<GR64, nonvolatile_store, + nonvolatile_load, bdxaddr20only>; + //===----------------------------------------------------------------------===// // Call instructions //===----------------------------------------------------------------------===// // The definitions here are for the call-clobbered registers. let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, - F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D], + F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC], R1 = 14, isCodeGenOnly = 1 in { def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops), "bras\t%r14, $I2", []>; @@ -197,12 +227,27 @@ def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), // Register moves. let neverHasSideEffects = 1 in { - def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; - def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; + def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>; + def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>; +} +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + def LTR : UnaryRR <"lt", 0x12, null_frag, GR32, GR32>; + def LTGR : UnaryRRE<"ltg", 0xB902, null_frag, GR64, GR64>; +} + +// Move on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def LOCR : CondUnaryRRF<"loc", 0xB9F2, GR32, GR32>; + def LOCGR : CondUnaryRRF<"locg", 0xB9E2, GR64, GR64>; +} +let Uses = [CC] in { + def AsmLOCR : AsmCondUnaryRRF<"loc", 0xB9F2, GR32, GR32>; + def AsmLOCGR : AsmCondUnaryRRF<"locg", 0xB9E2, GR64, GR64>; } // Immediate moves. -let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { // 16-bit sign-extended immediates. def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>; def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>; @@ -221,11 +266,8 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { // Register loads. let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { - defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>; - def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; - - def LG : UnaryRXY<"lg", 0xE304, load, GR64>; - def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; + defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>; + def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -234,16 +276,31 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { [(set GR128:$dst, (load bdxaddr20only128:$src))]>; } } +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + def LT : UnaryRXY<"lt", 0xE312, load, GR32, 4>; + def LTG : UnaryRXY<"ltg", 0xE302, load, GR64, 8>; +} + +let canFoldAsLoad = 1 in { + def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; + def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; +} + +// Load on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def LOC : CondUnaryRSY<"loc", 0xEBF2, nonvolatile_load, GR32, 4>; + def LOCG : CondUnaryRSY<"locg", 0xEBE2, nonvolatile_load, GR64, 8>; +} +let Uses = [CC] in { + def AsmLOC : AsmCondUnaryRSY<"loc", 0xEBF2, GR32, 4>; + def AsmLOCG : AsmCondUnaryRSY<"locg", 0xEBE2, GR64, 8>; +} // Register stores. let SimpleBDXStore = 1 in { - let isCodeGenOnly = 1 in { - defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>; - def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; - } - - def STG : StoreRXY<"stg", 0xE324, store, GR64>; - def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; + let isCodeGenOnly = 1 in + defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>; + def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -252,6 +309,20 @@ let SimpleBDXStore = 1 in { [(store GR128:$src, bdxaddr20only128:$dst)]>; } } +let isCodeGenOnly = 1 in + def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; +def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; + +// Store on condition. +let isCodeGenOnly = 1, Uses = [CC] in { + def STOC32 : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def STOC : CondStoreRSY<"stoc", 0xEBF3, GR64, 4>; + def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>; +} +let Uses = [CC] in { + def AsmSTOC : AsmCondStoreRSY<"stoc", 0xEBF3, GR32, 4>; + def AsmSTOCG : AsmCondStoreRSY<"stocg", 0xEBE3, GR64, 8>; +} // 8-bit immediate stores to 8-bit fields. defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>; @@ -261,22 +332,50 @@ def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>; def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>; def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; +// Memory-to-memory moves. +let mayLoad = 1, mayStore = 1 in + def MVC : InstSS<0xD2, (outs), (ins bdladdr12onlylen8:$BDL1, + bdaddr12only:$BD2), + "mvc\t$BDL1, $BD2", []>; + +let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in + def MVCWrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + imm32len8:$length), + [(z_mvc bdaddr12only:$dest, bdaddr12only:$src, + imm32len8:$length)]>; + +defm LoadStore8_32 : MVCLoadStore<anyextloadi8, truncstorei8, i32, + MVCWrapper, 1>; +defm LoadStore16_32 : MVCLoadStore<anyextloadi16, truncstorei16, i32, + MVCWrapper, 2>; +defm LoadStore32_32 : MVCLoadStore<load, store, i32, MVCWrapper, 4>; + +defm LoadStore8 : MVCLoadStore<anyextloadi8, truncstorei8, i64, + MVCWrapper, 1>; +defm LoadStore16 : MVCLoadStore<anyextloadi16, truncstorei16, i64, + MVCWrapper, 2>; +defm LoadStore32 : MVCLoadStore<anyextloadi32, truncstorei32, i64, + MVCWrapper, 4>; +defm LoadStore64 : MVCLoadStore<load, store, i64, MVCWrapper, 8>; + //===----------------------------------------------------------------------===// // Sign extensions //===----------------------------------------------------------------------===// // 32-bit extensions from registers. let neverHasSideEffects = 1 in { - def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; - def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; + def LBR : UnaryRRE<"lb", 0xB926, sext8, GR32, GR32>; + def LHR : UnaryRRE<"lh", 0xB927, sext16, GR32, GR32>; } // 64-bit extensions from registers. let neverHasSideEffects = 1 in { - def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; - def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; - def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; + def LGBR : UnaryRRE<"lgb", 0xB906, sext8, GR64, GR64>; + def LGHR : UnaryRRE<"lgh", 0xB907, sext16, GR64, GR64>; + def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>; } +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in + def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR64>; // Match 32-to-64-bit sign extensions in which the source is already // in a 64-bit register. @@ -284,16 +383,18 @@ def : Pat<(sext_inreg GR64:$src, i32), (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; // 32-bit extensions from memory. -def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32>; -defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32>; +def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32, 1>; +defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32, 2>; def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>; // 64-bit extensions from memory. -def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64>; -def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64>; -def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64>; +def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64, 1>; +def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64, 2>; +def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64, 4>; def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>; def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>; +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in + def LTGF : UnaryRXY<"ltgf", 0xE332, sextloadi32, GR64, 4>; // If the sign of a load-extend operation doesn't matter, use the signed ones. // There's not really much to choose between the sign and zero extensions, @@ -306,21 +407,29 @@ def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>; def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>; def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; +// We want PC-relative addresses to be tried ahead of BD and BDX addresses. +// However, BDXs have two extra operands and are therefore 6 units more +// complex. +let AddedComplexity = 7 in { + def : Pat<(i32 (extloadi16 pcrel32:$src)), (LHRL pcrel32:$src)>; + def : Pat<(i64 (extloadi16 pcrel32:$src)), (LGHRL pcrel32:$src)>; +} + //===----------------------------------------------------------------------===// // Zero extensions //===----------------------------------------------------------------------===// // 32-bit extensions from registers. let neverHasSideEffects = 1 in { - def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; - def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; + def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>; + def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>; } // 64-bit extensions from registers. let neverHasSideEffects = 1 in { - def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; - def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; - def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; + def LLGCR : UnaryRRE<"llgc", 0xB984, zext8, GR64, GR64>; + def LLGHR : UnaryRRE<"llgh", 0xB985, zext16, GR64, GR64>; + def LLGFR : UnaryRRE<"llgf", 0xB916, zext32, GR64, GR32>; } // Match 32-to-64-bit zero extensions in which the source is already @@ -329,14 +438,14 @@ def : Pat<(and GR64:$src, 0xffffffff), (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; // 32-bit extensions from memory. -def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32>; -def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32>; +def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32, 1>; +def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32, 2>; def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>; // 64-bit extensions from memory. -def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64>; -def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64>; -def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64>; +def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64, 1>; +def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64, 2>; +def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64, 4>; def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>; def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>; @@ -350,16 +459,16 @@ def : Pat<(i32 (trunc GR64:$src)), // Truncations of 32-bit registers to memory. let isCodeGenOnly = 1 in { - defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32>; - defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32>; + defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>; + defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>; def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; } // Truncations of 64-bit registers to memory. -defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64>; -defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64>; +defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64, 1>; +defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64, 2>; def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>; -defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64>; +defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64, 4>; def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>; //===----------------------------------------------------------------------===// @@ -378,30 +487,32 @@ def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>; // Byte-swapping register moves. let neverHasSideEffects = 1 in { - def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; - def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; + def LRVR : UnaryRRE<"lrv", 0xB91F, bswap, GR32, GR32>; + def LRVGR : UnaryRRE<"lrvg", 0xB90F, bswap, GR64, GR64>; } // Byte-swapping loads. Unlike normal loads, these instructions are // allowed to access storage more than once. -def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32>; -def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64>; +def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>; +def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>; // Likewise byte-swapping stores. -def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32>; -def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>, GR64>; +def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>; +def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>, + GR64, 8>; //===----------------------------------------------------------------------===// // Load address instructions //===----------------------------------------------------------------------===// // Load BDX-style addresses. -let neverHasSideEffects = 1, Function = "la" in { - let PairType = "12" in +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, + DispKey = "la" in { + let DispSize = "12" in def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2), "la\t$R1, $XBD2", [(set GR64:$R1, laaddr12pair:$XBD2)]>; - let PairType = "20" in + let DispSize = "20" in def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2), "lay\t$R1, $XBD2", [(set GR64:$R1, laaddr20pair:$XBD2)]>; @@ -409,7 +520,8 @@ let neverHasSideEffects = 1, Function = "la" in { // Load a PC-relative address. There's no version of this instruction // with a 16-bit offset, so there's no relaxation. -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2), "larl\t$R1, $I2", [(set GR64:$R1, pcrel32:$I2)]>; @@ -420,9 +532,12 @@ let neverHasSideEffects = 1 in { //===----------------------------------------------------------------------===// let Defs = [CC] in { - def LCR : UnaryRR <"lcr", 0x13, ineg, GR32, GR32>; - def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>; - def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>; + let CCValues = 0xF, CompareZeroCCMask = 0x8 in { + def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>; + def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>; + } + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>; } defm : SXU<ineg, LCGFR>; @@ -431,8 +546,8 @@ defm : SXU<ineg, LCGFR>; //===----------------------------------------------------------------------===// let isCodeGenOnly = 1 in - defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8>; -defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8>; + defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8, 1>; +defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8, 1>; defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>; defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>; @@ -456,7 +571,8 @@ def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>; // full-width move. (We use IILF rather than something like LLILF // for 32-bit moves because IILF leaves the upper 32 bits of the // GR64 unchanged.) -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + isReMaterializable = 1 in { def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; } def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>; @@ -473,27 +589,27 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), //===----------------------------------------------------------------------===// // Plain addition. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of a register. let isCommutable = 1 in { - def AR : BinaryRR <"ar", 0x1A, add, GR32, GR32>; - def AGR : BinaryRRE<"agr", 0xB908, add, GR64, GR64>; + defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>; + defm AGR : BinaryRREAndK<"ag", 0xB908, 0xB9E8, add, GR64, GR64>; } - def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>; + def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>; // Addition of signed 16-bit immediates. - def AHI : BinaryRI<"ahi", 0xA7A, add, GR32, imm32sx16>; - def AGHI : BinaryRI<"aghi", 0xA7B, add, GR64, imm64sx16>; + defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>; + defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>; // Addition of signed 32-bit immediates. def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>; def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>; // Addition of memory. - defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16>; - defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load>; - def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32>; - def AG : BinaryRXY<"ag", 0xE308, add, GR64, load>; + defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16, 2>; + defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>; + def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32, 4>; + def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>; // Addition to memory. def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; @@ -505,31 +621,37 @@ defm : SXB<add, GR64, AGFR>; let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { - def ALR : BinaryRR <"alr", 0x1E, addc, GR32, GR32>; - def ALGR : BinaryRRE<"algr", 0xB90A, addc, GR64, GR64>; + defm ALR : BinaryRRAndK<"al", 0x1E, 0xB9FA, addc, GR32, GR32>; + defm ALGR : BinaryRREAndK<"alg", 0xB90A, 0xB9EA, addc, GR64, GR64>; } - def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>; + def ALGFR : BinaryRRE<"algf", 0xB91A, null_frag, GR64, GR32>; + + // Addition of signed 16-bit immediates. + def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>, + Requires<[FeatureDistinctOps]>; + def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, addc, GR64, imm64sx16>, + Requires<[FeatureDistinctOps]>; // Addition of unsigned 32-bit immediates. def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>; def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>; // Addition of memory. - defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load>; - def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32>; - def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load>; + defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>; + def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32, 4>; + def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>; } defm : ZXB<addc, GR64, ALGFR>; // Addition producing and using a carry. let Defs = [CC], Uses = [CC] in { // Addition of a register. - def ALCR : BinaryRRE<"alcr", 0xB998, adde, GR32, GR32>; - def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>; + def ALCR : BinaryRRE<"alc", 0xB998, adde, GR32, GR32>; + def ALCGR : BinaryRRE<"alcg", 0xB988, adde, GR64, GR64>; // Addition of memory. - def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load>; - def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load>; + def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load, 4>; + def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>; } //===----------------------------------------------------------------------===// @@ -538,26 +660,26 @@ let Defs = [CC], Uses = [CC] in { // Plain substraction. Although immediate forms exist, we use the // add-immediate instruction instead. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of a register. - def SR : BinaryRR <"sr", 0x1B, sub, GR32, GR32>; - def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; - def SGR : BinaryRRE<"sgr", 0xB909, sub, GR64, GR64>; + defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>; + def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>; + defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>; // Subtraction of memory. - defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16>; - defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>; - def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>; - def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load>; + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16, 2>; + defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>; + def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32, 4>; + def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>; } defm : SXB<sub, GR64, SGFR>; // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. - def SLR : BinaryRR <"slr", 0x1F, subc, GR32, GR32>; - def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; - def SLGR : BinaryRRE<"slgr", 0xB90B, subc, GR64, GR64>; + defm SLR : BinaryRRAndK<"sl", 0x1F, 0xB9FB, subc, GR32, GR32>; + def SLGFR : BinaryRRE<"slgf", 0xB91B, null_frag, GR64, GR32>; + defm SLGR : BinaryRREAndK<"slg", 0xB90B, 0xB9EB, subc, GR64, GR64>; // Subtraction of unsigned 32-bit immediates. These don't match // subc because we prefer addc for constants. @@ -565,21 +687,21 @@ let Defs = [CC] in { def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>; // Subtraction of memory. - defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load>; - def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32>; - def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load>; + defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>; + def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32, 4>; + def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>; } defm : ZXB<subc, GR64, SLGFR>; // Subtraction producing and using a carry. let Defs = [CC], Uses = [CC] in { // Subtraction of a register. - def SLBR : BinaryRRE<"slbr", 0xB999, sube, GR32, GR32>; - def SLGBR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>; + def SLBR : BinaryRRE<"slb", 0xB999, sube, GR32, GR32>; + def SLGBR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>; // Subtraction of memory. - def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load>; - def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load>; + def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load, 4>; + def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load, 8>; } //===----------------------------------------------------------------------===// @@ -588,30 +710,37 @@ let Defs = [CC], Uses = [CC] in { let Defs = [CC] in { // ANDs of a register. - let isCommutable = 1 in { - def NR : BinaryRR <"nr", 0x14, and, GR32, GR32>; - def NGR : BinaryRRE<"ngr", 0xB980, and, GR64, GR64>; + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>; + defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>; } - // ANDs of a 16-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in { - def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; - def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; + let isConvertibleToThreeAddress = 1 in { + // ANDs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. + let isCodeGenOnly = 1 in { + def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; + def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; + } + def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>; + def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>; + def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>; + def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>; + + // ANDs of a 32-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in + def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; + def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>; + def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>; } - def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>; - def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>; - def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>; - def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>; - - // ANDs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in - def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; - def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>; - def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>; // ANDs of memory. - defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; + def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + } // AND to memory defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; @@ -625,12 +754,13 @@ defm : RMWIByte<and, bdaddr20pair, NIY>; let Defs = [CC] in { // ORs of a register. - let isCommutable = 1 in { - def OR : BinaryRR <"or", 0x16, or, GR32, GR32>; - def OGR : BinaryRRE<"ogr", 0xB981, or, GR64, GR64>; + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>; + defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>; } // ORs of a 16-bit immediate, leaving other bits unaffected. + // The CC result only reflects the 16-bit field, not the full register. let isCodeGenOnly = 1 in { def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; @@ -641,14 +771,18 @@ let Defs = [CC] in { def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>; // ORs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>; def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>; // ORs of memory. - defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load>; - def OG : BinaryRXY<"og", 0xE381, or, GR64, load>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; + def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + } // OR to memory defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; @@ -662,20 +796,24 @@ defm : RMWIByte<or, bdaddr20pair, OIY>; let Defs = [CC] in { // XORs of a register. - let isCommutable = 1 in { - def XR : BinaryRR <"xr", 0x17, xor, GR32, GR32>; - def XGR : BinaryRRE<"xgr", 0xB982, xor, GR64, GR64>; + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>; + defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>; } // XORs of a 32-bit immediate, leaving other bits unaffected. - let isCodeGenOnly = 1 in + // The CC result only reflects the 32-bit field, which means we can + // use it as a zero indicator for i32 operations but not otherwise. + let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>; def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>; // XORs of memory. - defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load>; - def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load>; + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; + def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + } // XOR to memory defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; @@ -689,10 +827,10 @@ defm : RMWIByte<xor, bdaddr20pair, XIY>; // Multiplication of a register. let isCommutable = 1 in { - def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>; - def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>; + def MSR : BinaryRRE<"ms", 0xB252, mul, GR32, GR32>; + def MSGR : BinaryRRE<"msg", 0xB90C, mul, GR64, GR64>; } -def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>; +def MSGFR : BinaryRRE<"msgf", 0xB91C, null_frag, GR64, GR32>; defm : SXB<mul, GR64, MSGFR>; // Multiplication of a signed 16-bit immediate. @@ -704,33 +842,32 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>; def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; // Multiplication of memory. -defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16>; -defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load>; -def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32>; -def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load>; +defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16, 2>; +defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>; +def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32, 4>; +def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; // Multiplication of a register, producing two results. -def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>; +def MLGR : BinaryRRE<"mlg", 0xB986, z_umul_lohi64, GR128, GR64>; // Multiplication of memory, producing two results. -def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>; +def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; //===----------------------------------------------------------------------===// // Division and remainder //===----------------------------------------------------------------------===// // Division and remainder, from registers. -def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; -def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; -def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; -def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; -defm : SXB<z_sdivrem64, GR128, DSGFR>; +def DSGFR : BinaryRRE<"dsgf", 0xB91D, z_sdivrem32, GR128, GR32>; +def DSGR : BinaryRRE<"dsg", 0xB90D, z_sdivrem64, GR128, GR64>; +def DLR : BinaryRRE<"dl", 0xB997, z_udivrem32, GR128, GR32>; +def DLGR : BinaryRRE<"dlg", 0xB987, z_udivrem64, GR128, GR64>; // Division and remainder, from memory. -def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>; -def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>; -def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>; -def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>; +def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; +def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; +def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; +def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; //===----------------------------------------------------------------------===// // Shifts @@ -738,35 +875,55 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>; // Shift left. let neverHasSideEffects = 1 in { - def SLL : ShiftRS <"sll", 0x89, shl, GR32, shift12only>; - def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64, shift20only>; + defm SLL : ShiftRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; + def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64>; } // Logical shift right. let neverHasSideEffects = 1 in { - def SRL : ShiftRS <"srl", 0x88, srl, GR32, shift12only>; - def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64, shift20only>; + defm SRL : ShiftRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; + def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64>; } // Arithmetic shift right. -let Defs = [CC] in { - def SRA : ShiftRS <"sra", 0x8A, sra, GR32, shift12only>; - def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64, shift20only>; +let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { + defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; + def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>; } // Rotate left. let neverHasSideEffects = 1 in { - def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32, shift20only>; - def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64, shift20only>; + def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32>; + def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64>; } // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and -// end bits (operands 2 and 3) are in the range [32, 64) +// end bits (operands 2 and 3) are in the range [32, 64). let Defs = [CC] in { let isCodeGenOnly = 1 in - def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; - def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; + def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; + let CCValues = 0xE, CompareZeroCCMask = 0xE in + def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; +} + +// Forms of RISBG that only affect one word of the destination register. +// They do not set CC. +let isCodeGenOnly = 1 in + def RISBLG32 : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR32>, + Requires<[FeatureHighWord]>; +def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GR64, GR64>, + Requires<[FeatureHighWord]>; +def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>, + Requires<[FeatureHighWord]>; + +// Rotate second operand left and perform a logical operation with selected +// bits of the first operand. The CC result only describes the selected bits, +// so isn't useful for a full comparison against zero. +let Defs = [CC] in { + def RNSBG : RotateSelectRIEf<"rnsbg", 0xEC54, GR64, GR64>; + def ROSBG : RotateSelectRIEf<"rosbg", 0xEC56, GR64, GR64>; + def RXSBG : RotateSelectRIEf<"rxsbg", 0xEC57, GR64, GR64>; } //===----------------------------------------------------------------------===// @@ -774,11 +931,11 @@ let Defs = [CC] in { //===----------------------------------------------------------------------===// // Signed comparisons. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xE in { // Comparison with a register. - def CR : CompareRR <"cr", 0x19, z_cmp, GR32, GR32>; - def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; - def CGR : CompareRRE<"cgr", 0xB920, z_cmp, GR64, GR64>; + def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>; + def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>; + def CGR : CompareRRE<"cg", 0xB920, z_cmp, GR64, GR64>; // Comparison with a signed 16-bit immediate. def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>; @@ -789,11 +946,11 @@ let Defs = [CC] in { def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>; // Comparison with memory. - defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16>; - defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load>; - def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16>; - def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32>; - def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load>; + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16, 2>; + defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load, 4>; + def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16, 2>; + def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32, 4>; + def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load, 8>; def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>; def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>; def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>; @@ -808,20 +965,20 @@ let Defs = [CC] in { defm : SXB<z_cmp, GR64, CGFR>; // Unsigned comparisons. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { // Comparison with a register. - def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; - def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; - def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; + def CLR : CompareRR <"cl", 0x15, z_ucmp, GR32, GR32>; + def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>; + def CLGR : CompareRRE<"clg", 0xB921, z_ucmp, GR64, GR64>; // Comparison with a signed 32-bit immediate. def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>; def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>; // Comparison with memory. - defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load>; - def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32>; - def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load>; + defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>; + def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32, 4>; + def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>; def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32, aligned_zextloadi16>; def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32, @@ -974,7 +1131,7 @@ def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2), // and the second giving a copy of the source with the leftmost one bit // cleared. We only use the first result here. let Defs = [CC] in { - def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>; + def FLOGR : UnaryRRE<"flog", 0xB983, null_frag, GR128, GR64>; } def : Pat<(ctlz GR64:$src), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>; @@ -1015,3 +1172,15 @@ def : Pat<(add GR64:$src1, imm64zx32n:$src2), (SLGFI GR64:$src1, imm64zx32n:$src2)>; def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), (SLGF GR64:$src1, bdxaddr20only:$addr)>; + +// Optimize sign-extended 1/0 selects to -1/0 selects. This is important +// for vector legalization. +def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, uimm8zx4:$cc)), + (i32 31)), + (i32 31)), + (Select32 (LHI -1), (LHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; +def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, + uimm8zx4:$cc)))), + (i32 63)), + (i32 63)), + (Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 2cb5823..114f74e 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -71,8 +71,6 @@ using namespace llvm; STATISTIC(LongBranches, "Number of long branches."); namespace { - typedef MachineBasicBlock::iterator Iter; - // Represents positional information about a basic block. struct MBBInfo { // The address that we currently assume the block has. @@ -150,6 +148,7 @@ namespace { bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); bool mustRelaxABranch(); void setWorstCaseAddresses(); + void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode); void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode); void relaxBranch(TerminatorInfo &Terminator); void relaxBranches(); @@ -220,6 +219,11 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { // Relaxes to BRCL, which is 2 bytes longer. Terminator.ExtraRelaxSize = 2; break; + case SystemZ::BRCT: + case SystemZ::BRCTG: + // Relaxes to A(G)HI and BRCL, which is 6 bytes longer. + Terminator.ExtraRelaxSize = 6; + break; case SystemZ::CRJ: // Relaxes to a CR/BRCL sequence, which is 2 bytes longer. Terminator.ExtraRelaxSize = 2; @@ -310,7 +314,7 @@ bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator, // Return true if, under current assumptions, any terminator needs // to be relaxed. bool SystemZLongBranch::mustRelaxABranch() { - for (SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(), + for (SmallVectorImpl<TerminatorInfo>::iterator TI = Terminators.begin(), TE = Terminators.end(); TI != TE; ++TI) if (mustRelaxBranch(*TI, TI->Address)) return true; @@ -322,7 +326,7 @@ bool SystemZLongBranch::mustRelaxABranch() { void SystemZLongBranch::setWorstCaseAddresses() { SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); BlockPosition Position(MF->getAlignment()); - for (SmallVector<MBBInfo, 16>::iterator BI = MBBs.begin(), BE = MBBs.end(); + for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end(); BI != BE; ++BI) { skipNonTerminators(Position, *BI); for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) { @@ -332,6 +336,25 @@ void SystemZLongBranch::setWorstCaseAddresses() { } } +// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed +// by a BRCL on the result. +void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI, + unsigned AddOpcode) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(AddOpcode)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addImm(-1); + MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_NE) + .addOperand(MI->getOperand(2)); + // The implicit use of CC is a killing use. + BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); + MI->eraseFromParent(); +} + // Split MI into the comparison given by CompareOpcode followed // a BRCL on the result. void SystemZLongBranch::splitCompareBranch(MachineInstr *MI, @@ -342,10 +365,11 @@ void SystemZLongBranch::splitCompareBranch(MachineInstr *MI, .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)); MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL)) + .addImm(SystemZ::CCMASK_ICMP) .addOperand(MI->getOperand(2)) .addOperand(MI->getOperand(3)); // The implicit use of CC is a killing use. - BRCL->getOperand(2).setIsKill(); + BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo()); MI->eraseFromParent(); } @@ -359,6 +383,12 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { case SystemZ::BRC: Branch->setDesc(TII->get(SystemZ::BRCL)); break; + case SystemZ::BRCT: + splitBranchOnCount(Branch, SystemZ::AHI); + break; + case SystemZ::BRCTG: + splitBranchOnCount(Branch, SystemZ::AGHI); + break; case SystemZ::CRJ: splitCompareBranch(Branch, SystemZ::CR); break; @@ -386,7 +416,7 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { void SystemZLongBranch::relaxBranches() { SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin(); BlockPosition Position(MF->getAlignment()); - for (SmallVector<MBBInfo, 16>::iterator BI = MBBs.begin(), BE = MBBs.end(); + for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end(); BI != BE; ++BI) { skipNonTerminators(Position, *BI); for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) { diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp index fd3f867..432a0d3 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -57,9 +57,6 @@ MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) - return MCOperand(); return MCOperand::CreateReg(MO.getReg()); case MachineOperand::MO_Immediate: @@ -104,8 +101,8 @@ void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(Opcode); for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); - MCOperand MCOp = lowerOperand(MO); - if (MCOp.isValid()) - OutMI.addOperand(MCOp); + // Ignore all implicit register operands. + if (!MO.isReg() || !MO.isImplicit()) + OutMI.addOperand(lowerOperand(MO)); } } diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h index afa72f3..db5bdb0 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.h +++ b/lib/Target/SystemZ/SystemZMCInstLower.h @@ -35,7 +35,7 @@ public: // Lower MachineInstr MI to MCInst OutMI. void lower(const MachineInstr *MI, MCInst &OutMI) const; - // Return an MCOperand for MO. Return an empty operand if MO is implicit. + // Return an MCOperand for MO. MCOperand lowerOperand(const MachineOperand& MO) const; // Return an MCOperand for MO, given that it equals Symbol + Offset. diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 1dc05a7..69c2691 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -15,7 +15,6 @@ namespace llvm { class SystemZMachineFunctionInfo : public MachineFunctionInfo { - unsigned SavedGPRFrameSize; unsigned LowSavedGPR; unsigned HighSavedGPR; unsigned VarArgsFirstGPR; @@ -26,14 +25,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) - : SavedGPRFrameSize(0), LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), - VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), - ManipulatesSP(false) {} - - // Get and set the number of bytes allocated by generic code to store - // call-saved GPRs. - unsigned getSavedGPRFrameSize() const { return SavedGPRFrameSize; } - void setSavedGPRFrameSize(unsigned bytes) { SavedGPRFrameSize = bytes; } + : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), + VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 66d9c5f..9d79439 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -53,49 +53,63 @@ class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop> // Constructs an AsmOperandClass for addressing mode FORMAT, treating the // registers as having BITSIZE bits and displacements as having DISPSIZE bits. -class AddressAsmOperand<string format, string bitsize, string dispsize> +// LENGTH is "LenN" for addresses with an N-bit length field, otherwise it +// is "". +class AddressAsmOperand<string format, string bitsize, string dispsize, + string length = ""> : AsmOperandClass { - let Name = format##bitsize##"Disp"##dispsize; + let Name = format##bitsize##"Disp"##dispsize##length; let ParserMethod = "parse"##format##bitsize; let RenderMethod = "add"##format##"Operands"; } // Constructs both a DAG pattern and instruction operand for an addressing mode. -// The mode is selected by custom code in select<TYPE><DISPSIZE><SUFFIX>(), -// encoded by custom code in get<FORMAT><DISPSIZE>Encoding() and decoded -// by custom code in decode<TYPE><BITSIZE>Disp<DISPSIZE>Operand(). -// The address registers have BITSIZE bits and displacements have -// DISPSIZE bits. NUMOPS is the number of operands that make up an -// address and OPERANDS lists the types of those operands using (ops ...). -// FORMAT is the type of addressing mode, which needs to match the names -// used in AddressAsmOperand. -class AddressingMode<string type, string bitsize, string dispsize, - string suffix, int numops, string format, dag operands> +// FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated +// AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands +// (base register, displacement, etc.). SELTYPE is the type of the memory +// operand for selection purposes; sometimes we want different selection +// choices for the same underlying addressing mode. SUFFIX is similarly +// a suffix appended to the displacement for selection purposes; +// e.g. we want to reject small 20-bit displacements if a 12-bit form +// also exists, but we want to accept them otherwise. +class AddressingMode<string seltype, string bitsize, string dispsize, + string suffix, string length, int numops, string format, + dag operands> : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, - "select"##type##dispsize##suffix, + "select"##seltype##dispsize##suffix##length, [add, sub, or, frameindex, z_adjdynalloc]>, Operand<!cast<ValueType>("i"##bitsize)> { let PrintMethod = "print"##format##"Operand"; - let EncoderMethod = "get"##format##dispsize##"Encoding"; - let DecoderMethod = "decode"##format##bitsize##"Disp"##dispsize##"Operand"; + let EncoderMethod = "get"##format##dispsize##length##"Encoding"; + let DecoderMethod = + "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; let MIOperandInfo = operands; let ParserMatchClass = - !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize); + !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length); } // An addressing mode with a base and displacement but no index. class BDMode<string type, string bitsize, string dispsize, string suffix> - : AddressingMode<type, bitsize, dispsize, suffix, 2, "BDAddr", + : AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr", (ops !cast<RegisterOperand>("ADDR"##bitsize), !cast<Immediate>("disp"##dispsize##"imm"##bitsize))>; // An addressing mode with a base, displacement and index. class BDXMode<string type, string bitsize, string dispsize, string suffix> - : AddressingMode<type, bitsize, dispsize, suffix, 3, "BDXAddr", + : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr", (ops !cast<RegisterOperand>("ADDR"##bitsize), !cast<Immediate>("disp"##dispsize##"imm"##bitsize), !cast<RegisterOperand>("ADDR"##bitsize))>; +// A BDMode paired with an immediate length operand of LENSIZE bits. +class BDLMode<string type, string bitsize, string dispsize, string suffix, + string lensize> + : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3, + "BDLAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize), + !cast<Immediate>("imm"##bitsize))>; + //===----------------------------------------------------------------------===// // Extracting immediate operands from nodes // These all create MVT::i64 nodes to ensure the value is not sign-extended @@ -205,6 +219,11 @@ def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">; // i32 immediates //===----------------------------------------------------------------------===// +// Immediates for 8-bit lengths. +def imm32len8 : Immediate<i32, [{ + return isUInt<8>(N->getZExtValue() - 1); +}], NOOP_SDNodeXForm, "U32Imm">; + // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being zero. def imm32ll16 : Immediate<i32, [{ @@ -402,15 +421,16 @@ def disp12imm64 : Operand<i64>; def disp20imm32 : Operand<i32>; def disp20imm64 : Operand<i64>; -def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; -def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; -def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; -def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; -def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; -def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; +def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; +def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; +def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; +def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; +def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; // DAG patterns and operands for addressing modes. Each mode has -// the form <type><range><group> where: +// the form <type><range><group>[<len>] where: // // <type> is one of: // shift : base + displacement (32-bit) @@ -418,6 +438,7 @@ def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; // bdxaddr : base + displacement + index // laaddr : like bdxaddr, but used for Load Address operations // dynalloc : base + displacement + index + ADJDYNALLOC +// bdladdr : base + displacement with a length field // // <range> is one of: // 12 : the displacement is an unsigned 12-bit value @@ -428,20 +449,26 @@ def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; // range value (12 or 20) // only : used when there is no equivalent instruction with the opposite // range value -def shift12only : BDMode <"BDAddr", "32", "12", "Only">; -def shift20only : BDMode <"BDAddr", "32", "20", "Only">; -def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; -def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; -def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; -def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; -def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; -def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; -def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; -def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; -def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; -def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; -def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; -def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +// +// <len> is one of: +// +// <empty> : there is no length field +// len8 : the length field is 8 bits, with a range of [1, 0x100]. +def shift12only : BDMode <"BDAddr", "32", "12", "Only">; +def shift20only : BDMode <"BDAddr", "32", "20", "Only">; +def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; +def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; +def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; +def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; +def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; +def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; +def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; +def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; +def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; +def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; +def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; +def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; //===----------------------------------------------------------------------===// // Miscellaneous diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index ab01b25..6a3af2b 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -15,13 +15,15 @@ def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>, SDTCisVT<1, i64>]>; def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; -def SDT_ZBRCCMask : SDTypeProfile<0, 2, +def SDT_ZBRCCMask : SDTypeProfile<0, 3, [SDTCisVT<0, i8>, - SDTCisVT<1, OtherVT>]>; -def SDT_ZSelectCCMask : SDTypeProfile<1, 3, + SDTCisVT<1, i8>, + SDTCisVT<2, OtherVT>]>; +def SDT_ZSelectCCMask : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisVT<3, i8>]>; + SDTCisVT<3, i8>, + SDTCisVT<4, i8>]>; def SDT_ZWrapPtr : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -52,6 +54,10 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, SDTCisVT<4, i32>, SDTCisVT<5, i32>, SDTCisVT<6, i32>]>; +def SDT_ZCopy : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -81,6 +87,7 @@ def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; +def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; @@ -102,6 +109,9 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; +def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZCopy, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + //===----------------------------------------------------------------------===// // Pattern fragments //===----------------------------------------------------------------------===// @@ -120,6 +130,20 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; +// Extending loads in which the extension type doesn't matter. +def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD; +}]>; +def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; +def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; +def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + // Aligned loads. class AlignedLoad<SDPatternOperator load> : PatFrag<(ops node:$addr), (load node:$addr), [{ @@ -149,7 +173,10 @@ class NonvolatileLoad<SDPatternOperator load> LoadSDNode *Load = cast<LoadSDNode>(N); return !Load->isVolatile(); }]>; -def nonvolatile_load : NonvolatileLoad<load>; +def nonvolatile_load : NonvolatileLoad<load>; +def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>; +def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>; +def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>; // Non-volatile stores. class NonvolatileStore<SDPatternOperator store> @@ -157,7 +184,10 @@ class NonvolatileStore<SDPatternOperator store> StoreSDNode *Store = cast<StoreSDNode>(N); return !Store->isVolatile(); }]>; -def nonvolatile_store : NonvolatileStore<store>; +def nonvolatile_store : NonvolatileStore<store>; +def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>; +def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>; +def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>; // Insertions. def inserti8 : PatFrag<(ops node:$src1, node:$src2), diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index 3689f74..c442ae0 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -50,12 +50,8 @@ class RMWI<SDPatternOperator load, SDPatternOperator operator, // memory location. IMM is the type of the second operand. multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode, Instruction insn> { - def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>; - def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>; - def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>; - def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>; - def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>; - def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>; + def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm32, insn>; + def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm64, insn>; } // Record that INSN performs insertion TYPE into a register of class CLS. @@ -69,3 +65,23 @@ multiclass InsertMem<string type, Instruction insn, RegisterOperand cls, (load mode:$src2), cls:$src1), (insn cls:$src1, mode:$src2)>; } + +// Use MVC instruction INSN for a load of type LOAD followed by a store +// of type STORE. VT is the type of the intermediate register and LENGTH +// is the number of bytes to copy (which may be smaller than VT). +multiclass MVCLoadStore<SDPatternOperator load, SDPatternOperator store, + ValueType vt, Instruction insn, bits<5> length> { + def Pat : PatFrag<(ops node:$dest, node:$src), + (store (vt (load node:$src)), node:$dest), + [{ return storeLoadCanUseMVC(N); }]>; + + def : Pat<(!cast<SDPatternOperator>(NAME##"Pat") bdaddr12only:$dest, + bdaddr12only:$src), + (insn bdaddr12only:$dest, bdaddr12only:$src, length)>; +} + +// Record that INSN is a LOAD AND TEST that can be used to compare +// registers in CLS against zero. The instruction has separate R1 and R2 +// operands, but they must be the same when the instruction is used like this. +class CompareZeroFP<Instruction insn, RegisterOperand cls> + : Pat<(z_cmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td new file mode 100644 index 0000000..7e14aa7 --- /dev/null +++ b/lib/Target/SystemZ/SystemZProcessors.td @@ -0,0 +1,38 @@ +//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Processor and feature definitions. +// +//===----------------------------------------------------------------------===// + +class SystemZFeature<string extname, string intname, string desc> + : Predicate<"Subtarget.has"##intname##"()">, + AssemblerPredicate<"Feature"##intname, extname>, + SubtargetFeature<extname, "Has"##intname, "true", desc>; + +def FeatureDistinctOps : SystemZFeature< + "distinct-ops", "DistinctOps", + "Assume that the distinct-operands facility is installed" +>; + +def FeatureLoadStoreOnCond : SystemZFeature< + "load-store-on-cond", "LoadStoreOnCond", + "Assume that the load/store-on-condition facility is installed" +>; + +def FeatureHighWord : SystemZFeature< + "high-word", "HighWord", + "Assume that the high-word facility is installed" +>; + +def : Processor<"z10", NoItineraries, []>; +def : Processor<"z196", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord]>; +def : Processor<"zEC12", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord]>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index c695bb3..8ce6d6a 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -53,32 +53,6 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -bool -SystemZRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator SaveMBBI, - MachineBasicBlock::iterator &UseMBBI, - const TargetRegisterClass *RC, - unsigned Reg) const { - MachineFunction &MF = *MBB.getParent(); - const SystemZInstrInfo &TII = - *static_cast<const SystemZInstrInfo*>(TM.getInstrInfo()); - const SystemZFrameLowering *TFI = - static_cast<const SystemZFrameLowering *>(TM.getFrameLowering()); - unsigned Base = getFrameRegister(MF); - uint64_t Offset = TFI->getEmergencySpillSlotOffset(MF); - DebugLoc DL; - - unsigned LoadOpcode, StoreOpcode; - TII.getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); - - // The offset must always be in range of a 12-bit unsigned displacement. - BuildMI(MBB, SaveMBBI, DL, TII.get(StoreOpcode)) - .addReg(Reg, RegState::Kill).addReg(Base).addImm(Offset).addReg(0); - BuildMI(MBB, UseMBBI, DL, TII.get(LoadOpcode), Reg) - .addReg(Base).addImm(Offset).addReg(0); - return true; -} - void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 047cb4a..c447e4d 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -52,11 +52,6 @@ public: const LLVM_OVERRIDE; virtual BitVector getReservedRegs(const MachineFunction &MF) const LLVM_OVERRIDE; - virtual bool saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator SaveMBBI, - MachineBasicBlock::iterator &UseMBBI, - const TargetRegisterClass *RC, - unsigned Reg) const LLVM_OVERRIDE; virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const LLVM_OVERRIDE; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index d65553e..ffffe72 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -147,5 +147,7 @@ defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q, // Other registers //===----------------------------------------------------------------------===// -// The 2-bit condition code field of the PSW. +// The 2-bit condition code field of the PSW. Every register named in an +// inline asm needs a class associated with it. def CC : SystemZReg<"cc">; +def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>; diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp new file mode 100644 index 0000000..4ca9292 --- /dev/null +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -0,0 +1,127 @@ +//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-selectiondag-info" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" + +using namespace llvm; + +SystemZSelectionDAGInfo:: +SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { +} + +SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + if (IsVolatile) + return SDValue(); + + if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes >= 1 && Bytes <= 0x100) { + // A single MVC. + return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, + Chain, Dst, Src, Size); + } + } + return SDValue(); +} + +// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by +// Chain, Dst, ByteVal and Size. These cases are expected to use +// MVI, MVHHI, MVHI and MVGHI respectively. +static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, uint64_t ByteVal, uint64_t Size, + unsigned Align, + MachinePointerInfo DstPtrInfo) { + uint64_t StoreVal = ByteVal; + for (unsigned I = 1; I < Size; ++I) + StoreVal |= ByteVal << (I * 8); + return DAG.getStore(Chain, DL, + DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), + Dst, DstPtrInfo, false, false, Align); +} + +SDValue SystemZSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Byte, SDValue Size, + unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const { + EVT DstVT = Dst.getValueType(); + + if (IsVolatile) + return SDValue(); + + if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { + uint64_t Bytes = CSize->getZExtValue(); + if (Bytes == 0) + return SDValue(); + if (ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte)) { + // Handle cases that can be done using at most two of + // MVI, MVHI, MVHHI and MVGHI. The latter two can only be + // used if ByteVal is all zeros or all ones; in other casees, + // we can move at most 2 halfwords. + uint64_t ByteVal = CByte->getZExtValue(); + if (ByteVal == 0 || ByteVal == 255 ? + Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : + Bytes <= 4) { + unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); + unsigned Size2 = Bytes - Size1; + SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, + Align, DstPtrInfo); + if (Size2 == 0) + return Chain1; + Dst = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(Size1, DstVT)); + DstPtrInfo = DstPtrInfo.getWithOffset(Size1); + SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, + std::min(Align, Size1), DstPtrInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } else { + // Handle one and two bytes using STC. + if (Bytes <= 2) { + SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + if (Bytes == 1) + return Chain1; + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(1, DstVT)); + SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, + DstPtrInfo.getWithOffset(1), + false, false, 1); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); + } + } + assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); + if (Bytes <= 0x101) { + // Copy the byte to the first location and then use MVC to copy + // it to the rest. + Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, + false, false, Align); + SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst, + DAG.getConstant(1, DstVT)); + return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst2, Dst, + DAG.getConstant(Bytes - 1, MVT::i32)); + } + } + return SDValue(); +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h new file mode 100644 index 0000000..9138a9c --- /dev/null +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -0,0 +1,46 @@ +//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SystemZ subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZSELECTIONDAGINFO_H +#define SYSTEMZSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SystemZTargetMachine; + +class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM); + ~SystemZSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool IsVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const + LLVM_OVERRIDE; + + virtual SDValue + EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, + SDValue Chain, SDValue Dst, SDValue Byte, + SDValue Size, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const; +}; + +} + +#endif diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index cfd3324..036ec05 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -9,6 +9,7 @@ #include "SystemZSubtarget.h" #include "llvm/IR/GlobalValue.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -19,7 +20,8 @@ using namespace llvm; SystemZSubtarget::SystemZSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) - : SystemZGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT) { + : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), + HasLoadStoreOnCond(false), HasHighWord(false), TargetTriple(TT) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "z10"; diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 8d4d450..4efb58d 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -26,6 +26,11 @@ class GlobalValue; class StringRef; class SystemZSubtarget : public SystemZGenSubtargetInfo { +protected: + bool HasDistinctOps; + bool HasLoadStoreOnCond; + bool HasHighWord; + private: Triple TargetTriple; @@ -36,6 +41,15 @@ public: // Automatically generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + // Return true if the target has the distinct-operands facility. + bool hasDistinctOps() const { return HasDistinctOps; } + + // Return true if the target has the load/store-on-condition facility. + bool hasLoadStoreOnCond() const { return HasLoadStoreOnCond; } + + // Return true if the target has the high-word facility. + bool hasHighWord() const { return HasHighWord; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 6e7540c..856183c 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -48,6 +48,7 @@ public: } virtual bool addInstSelector() LLVM_OVERRIDE; + virtual bool addPreSched2() LLVM_OVERRIDE; virtual bool addPreEmitPass() LLVM_OVERRIDE; }; } // end anonymous namespace @@ -57,7 +58,38 @@ bool SystemZPassConfig::addInstSelector() { return false; } +bool SystemZPassConfig::addPreSched2() { + if (getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond()) + addPass(&IfConverterID); + return true; +} + bool SystemZPassConfig::addPreEmitPass() { + // We eliminate comparisons here rather than earlier because some + // transformations can change the set of available CC values and we + // generally want those transformations to have priority. This is + // especially true in the commonest case where the result of the comparison + // is used by a single in-range branch instruction, since we will then + // be able to fuse the compare and the branch instead. + // + // For example, two-address NILF can sometimes be converted into + // three-address RISBLG. NILF produces a CC value that indicates whether + // the low word is zero, but RISBLG does not modify CC at all. On the + // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. + // The CC value produced by NILL isn't useful for our purposes, but the + // value produced by RISBG can be used for any comparison with zero + // (not just equality). So there are some transformations that lose + // CC values (while still being worthwhile) and others that happen to make + // the CC result more useful than it was originally. + // + // Another reason is that we only want to use BRANCH ON COUNT in cases + // where we know that the count register is not going to be spilled. + // + // Doing it so late makes it more likely that a register will be reused + // between the comparison and the branch, but it isn't clear whether + // preventing that would be a win or not. + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZElimComparePass(getSystemZTargetMachine())); addPass(createSystemZLongBranchPass(getSystemZTargetMachine())); return true; } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 98614e7..a99a98e 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -20,10 +20,10 @@ #include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSubtarget.h" +#include "SystemZSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" namespace llvm { @@ -32,7 +32,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { const DataLayout DL; SystemZInstrInfo InstrInfo; SystemZTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; + SystemZSelectionDAGInfo TSInfo; SystemZFrameLowering FrameLowering; public: |