diff options
Diffstat (limited to 'lib/Target/X86')
36 files changed, 1041 insertions, 596 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c352bfc..d45dd35 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -15,9 +15,11 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -25,17 +27,15 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; namespace { struct X86Operand; class X86ATTAsmParser : public TargetAsmParser { + MCSubtargetInfo &STI; MCAsmParser &Parser; - TargetMachine &TM; - -protected: - unsigned Is64Bit : 1; private: MCAsmParser &getParser() const { return Parser; } @@ -61,6 +61,11 @@ private: /// or %es:(%edi) in 32bit mode. bool isDstOp(X86Operand &Op); + bool is64BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode64Bit) != 0; + } + /// @name Auto-generated Matcher Functions /// { @@ -70,12 +75,11 @@ private: /// } public: - X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM) - : TargetAsmParser(T), Parser(parser), TM(TM) { + X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) + : TargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures( - &TM.getSubtarget<X86Subtarget>())); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); @@ -84,23 +88,6 @@ public: virtual bool ParseDirective(AsmToken DirectiveID); }; - -class X86_32ATTAsmParser : public X86ATTAsmParser { -public: - X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) - : X86ATTAsmParser(T, Parser, TM) { - Is64Bit = false; - } -}; - -class X86_64ATTAsmParser : public X86ATTAsmParser { -public: - X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) - : X86ATTAsmParser(T, Parser, TM) { - Is64Bit = true; - } -}; - } // end anonymous namespace /// @name Auto-generated Match Functions @@ -155,7 +142,7 @@ struct X86Operand : public MCParsedAsmOperand { /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - virtual void dump(raw_ostream &OS) const {} + virtual void print(raw_ostream &OS) const {} StringRef getToken() const { assert(Kind == Token && "Invalid access!"); @@ -365,7 +352,7 @@ struct X86Operand : public MCParsedAsmOperand { } // end anonymous namespace. bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { - unsigned basereg = Is64Bit ? X86::RSI : X86::ESI; + unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; return (Op.isMem() && (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && @@ -375,7 +362,7 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { } bool X86ATTAsmParser::isDstOp(X86Operand &Op) { - unsigned basereg = Is64Bit ? X86::RDI : X86::EDI; + unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; return Op.isMem() && Op.Mem.SegReg == X86::ES && isa<MCConstantExpr>(Op.Mem.Disp) && @@ -406,7 +393,7 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // FIXME: This should be done using Requires<In32BitMode> and // Requires<In64BitMode> so "eiz" usage in 64-bit instructions // can be also checked. - if (RegNo == X86::RIZ && !Is64Bit) + if (RegNo == X86::RIZ && !is64BitMode()) return Error(Tok.getLoc(), "riz register in 64-bit mode only"); // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. @@ -710,23 +697,6 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } - // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq - if (PatchedName.startswith("vpclmul")) { - unsigned CLMULQuadWordSelect = StringSwitch<unsigned>( - PatchedName.slice(7, PatchedName.size() - 2)) - .Case("lqlq", 0x00) // src1[63:0], src2[63:0] - .Case("hqlq", 0x01) // src1[127:64], src2[63:0] - .Case("lqhq", 0x10) // src1[63:0], src2[127:64] - .Case("hqhq", 0x11) // src1[127:64], src2[127:64] - .Default(~0U); - if (CLMULQuadWordSelect != ~0U) { - ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect, - getParser().getContext()); - assert(PatchedName.endswith("dq") && "Unexpected mnemonic!"); - PatchedName = "vpclmulqdq"; - } - } - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (ExtraImmOp) @@ -843,7 +813,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" if (Name.startswith("movs") && Operands.size() == 3 && (Name == "movsb" || Name == "movsw" || Name == "movsl" || - (Is64Bit && Name == "movsq"))) { + (is64BitMode() && Name == "movsq"))) { X86Operand &Op = *(X86Operand*)Operands.begin()[1]; X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; if (isSrcOp(Op) && isDstOp(Op2)) { @@ -856,7 +826,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" if (Name.startswith("lods") && Operands.size() == 3 && (Name == "lods" || Name == "lodsb" || Name == "lodsw" || - Name == "lodsl" || (Is64Bit && Name == "lodsq"))) { + Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) { X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); if (isSrcOp(*Op1) && Op2->isReg()) { @@ -886,7 +856,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" if (Name.startswith("stos") && Operands.size() == 3 && (Name == "stos" || Name == "stosb" || Name == "stosw" || - Name == "stosl" || (Is64Bit && Name == "stosq"))) { + Name == "stosl" || (is64BitMode() && Name == "stosq"))) { X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); if (isDstOp(*Op2) && Op1->isReg()) { @@ -1161,8 +1131,8 @@ extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target); - RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target); + RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); + RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 633d982..b112f9f 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -27,7 +27,6 @@ set(sources X86InstrInfo.cpp X86JITInfo.cpp X86MachObjectWriter.cpp - X86MCAsmInfo.cpp X86MCCodeEmitter.cpp X86MCInstLower.cpp X86RegisterInfo.cpp diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 53738b1..c37d879 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,8 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" -#include "X86Subtarget.h" -#include "MCTargetDesc/X86TargetDesc.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -31,11 +30,8 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" -X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) +X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { - // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures( - &TM.getSubtarget<X86Subtarget>())); } void X86ATTInstPrinter::printRegName(raw_ostream &OS, diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 5f939b6..5426e5c 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -19,19 +19,15 @@ namespace llvm { class MCOperand; -class X86Subtarget; -class TargetMachine; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI); + X86ATTInstPrinter(const MCAsmInfo &MAI); virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS); virtual StringRef getOpcodeName(unsigned Opcode) const; - // Methods used to print the alias of an instruction. - unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; // Autogenerated by tblgen, returns true if we successfully printed an // alias. bool printAliasInstr(const MCInst *MI, raw_ostream &OS); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 5461c83..4e28dfe 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86InstComments.h" -#include "MCTargetDesc/X86TargetDesc.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" #include "../Utils/X86ShuffleDecode.h" diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 411d832..506e26c 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -15,8 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" -#include "X86Subtarget.h" -#include "MCTargetDesc/X86TargetDesc.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index c8030c3..e84a194 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -20,11 +20,10 @@ namespace llvm { class MCOperand; -class TargetMachine; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + X86IntelInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 50be61c..ca88f8f 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -1,2 +1,7 @@ -add_llvm_library(LLVMX86Desc X86TargetDesc.cpp) +add_llvm_library(LLVMX86Desc + X86MCTargetDesc.cpp + X86MCAsmInfo.cpp + ) +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 2e1ec63..2703100 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "X86MCAsmInfo.h" -#include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -45,14 +44,17 @@ static const char *const x86_asm_table[] = { "{flags}", "", "{dirflag}", "", "{fpsr}", "", + "{fpcr}", "", "{cc}", "cc", 0,0}; -X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { +X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { + bool is64Bit = T.getArch() == Triple::x86_64; + if (is64Bit) + PointerSize = 8; + AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; - - bool is64Bit = Triple.getArch() == Triple::x86_64; TextAlignFillValue = 0x90; @@ -74,22 +76,14 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { ExceptionsType = ExceptionHandling::DwarfCFI; } -const MCExpr * -X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, - unsigned Encoding, - MCStreamer &Streamer) const { - MCContext &Context = Streamer.getContext(); - const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); - const MCExpr *Four = MCConstantExpr::Create(4, Context); - return MCBinaryExpr::CreateAdd(Res, Four, Context); -} - X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) : X86MCAsmInfoDarwin(Triple) { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { + if (T.getArch() == Triple::x86_64) + PointerSize = 8; + AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; @@ -114,6 +108,17 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { Data64bitsDirective = 0; } +const MCExpr * +X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + MCContext &Context = Streamer.getContext(); + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); + const MCExpr *Four = MCConstantExpr::Create(4, Context); + return MCBinaryExpr::CreateAdd(Res, Four, Context); +} + const MCSection *X86ELFMCAsmInfo:: getNonexecutableStackSection(MCContext &Ctx) const { return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h index 2cd4c8e..2cd4c8e 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp new file mode 100644 index 0000000..b77f37b --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -0,0 +1,185 @@ +//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "X86MCTargetDesc.h" +#include "X86MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Host.h" + +#define GET_REGINFO_MC_DESC +#include "X86GenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "X86GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "X86GenSubtargetInfo.inc" + +using namespace llvm; + + +std::string X86_MC::ParseX86Triple(StringRef TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::x86_64) + return "+64bit-mode"; + return "-64bit-mode"; +} + +/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the +/// specified arguments. If we can't run cpuid on the host, return true. +bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) + #if defined(__GNUC__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + asm ("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; + #endif +#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + #if defined(__GNUC__) + asm ("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + __asm { + mov eax,value + cpuid + mov esi,rEAX + mov dword ptr [esi],eax + mov esi,rEBX + mov dword ptr [esi],ebx + mov esi,rECX + mov dword ptr [esi],ecx + mov esi,rEDX + mov dword ptr [esi],edx + } + return false; + #endif +#endif + return true; +} + +void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, + unsigned &Model) { + Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (Family == 6 || Family == 0xf) { + if (Family == 0xf) + // Examine extended family ID if family ID is F. + Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + std::string ArchFS = X86_MC::ParseX86Triple(TT); + if (!FS.empty()) { + if (!ArchFS.empty()) + ArchFS = ArchFS + "," + FS.str(); + else + ArchFS = FS; + } + + std::string CPUName = CPU; + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) + CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } + + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeX86MCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, + X86_MC::createX86MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, + X86_MC::createX86MCSubtargetInfo); +} + +static MCInstrInfo *createX86MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitX86MCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeX86MCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); +} + +static MCRegisterInfo *createX86MCRegisterInfo() { + MCRegisterInfo *X = new MCRegisterInfo(); + InitX86MCRegisterInfo(X); + return X; +} + +extern "C" void LLVMInitializeX86MCRegInfo() { + TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); +} + + +static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { + if (TheTriple.getArch() == Triple::x86_64) + return new X86_64MCAsmInfoDarwin(TheTriple); + else + return new X86MCAsmInfoDarwin(TheTriple); + } + + if (TheTriple.isOSWindows()) + return new X86MCAsmInfoCOFF(TheTriple); + + return new X86ELFMCAsmInfo(TheTriple); +} + +extern "C" void LLVMInitializeX86MCAsmInfo() { + // Register the target asm info. + RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo); + RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo); +} diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h new file mode 100644 index 0000000..89ea22b --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -0,0 +1,60 @@ +//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef X86MCTARGETDESC_H +#define X86MCTARGETDESC_H + +#include <string> + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheX86_32Target, TheX86_64Target; + +namespace X86_MC { + std::string ParseX86Triple(StringRef TT); + + /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in + /// the specified arguments. If we can't run cpuid on the host, return true. + bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX); + + void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model); + + /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance. + /// This is exposed so Asm parser, etc. do not need to go through + /// TargetRegistry. + MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS); +} + +} // End llvm namespace + + +// Defines symbolic names for X86 registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "X86GenRegisterInfo.inc" + +// Defines symbolic names for the X86 instructions. +// +#define GET_INSTRINFO_ENUM +#include "X86GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "X86GenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp deleted file mode 100644 index c7c37f6..0000000 --- a/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp +++ /dev/null @@ -1,74 +0,0 @@ -//===-- X86TargetDesc.cpp - X86 Target Descriptions -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides X86 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "X86TargetDesc.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" - -#define GET_REGINFO_MC_DESC -#include "X86GenRegisterInfo.inc" - -#define GET_INSTRINFO_MC_DESC -#include "X86GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "X86GenSubtargetInfo.inc" - -using namespace llvm; - -MCInstrInfo *createX86MCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitX86MCInstrInfo(X); - return X; -} - -MCRegisterInfo *createX86MCRegisterInfo() { - MCRegisterInfo *X = new MCRegisterInfo(); - InitX86MCRegisterInfo(X); - return X; -} - -MCSubtargetInfo *createX86MCSubtargetInfo() { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitX86MCSubtargetInfo(X); - return X; -} - -// Force static initialization. -extern "C" void LLVMInitializeX86MCInstrInfo() { - RegisterMCInstrInfo<MCInstrInfo> X(TheX86_32Target); - RegisterMCInstrInfo<MCInstrInfo> Y(TheX86_64Target); - - TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); -} - -extern "C" void LLVMInitializeX86MCRegInfo() { - RegisterMCRegInfo<MCRegisterInfo> X(TheX86_32Target); - RegisterMCRegInfo<MCRegisterInfo> Y(TheX86_64Target); - - TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); -} - -extern "C" void LLVMInitializeX86MCSubtargetInfo() { - RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheX86_32Target); - RegisterMCSubtargetInfo<MCSubtargetInfo> Y(TheX86_64Target); - - TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, - createX86MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, - createX86MCSubtargetInfo); -} diff --git a/lib/Target/X86/MCTargetDesc/X86TargetDesc.h b/lib/Target/X86/MCTargetDesc/X86TargetDesc.h deleted file mode 100644 index 9ab622d..0000000 --- a/lib/Target/X86/MCTargetDesc/X86TargetDesc.h +++ /dev/null @@ -1,34 +0,0 @@ -//===-- X86TargetDesc.h - X86 Target Descriptions ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides X86 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef X86TARGETDESC_H -#define X86TARGETDESC_H - -namespace llvm { -class Target; - -extern Target TheX86_32Target, TheX86_64Target; -} // End llvm namespace - -// Defines symbolic names for X86 registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "X86GenRegisterInfo.inc" - -// Defines symbolic names for the X86 instructions. -// -#define GET_INSTRINFO_ENUM -#include "X86GenInstrInfo.inc" - -#endif diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 9d66c2f..ec52dfb 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -15,7 +15,7 @@ #ifndef TARGET_X86_H #define TARGET_X86_H -#include "MCTargetDesc/X86TargetDesc.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" @@ -23,10 +23,12 @@ namespace llvm { class FunctionPass; class JITCodeEmitter; +class MachineCodeEmitter; class MCCodeEmitter; class MCContext; +class MCInstrInfo; class MCObjectWriter; -class MachineCodeEmitter; +class MCSubtargetInfo; class Target; class TargetAsmBackend; class X86TargetMachine; @@ -58,10 +60,9 @@ FunctionPass *createSSEDomainFixPass(); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, - MCContext &Ctx); -MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, - MCContext &Ctx); +MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &); TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7bb9676..4ccb43f 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -17,6 +17,13 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// +// X86 Subtarget state. +// + +def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", + "64-bit mode (x86_64)">; + +//===----------------------------------------------------------------------===// // X86 Subtarget features. //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 4d7d96d..9b556a5 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -194,6 +194,9 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) { // PUSH case X86::PUSHi8: return X86::PUSHi32; + case X86::PUSHi16: return X86::PUSHi32; + case X86::PUSH64i8: return X86::PUSH64i32; + case X86::PUSH64i16: return X86::PUSH64i32; } } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index c2d53c4..99b4479 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -709,13 +709,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, //===----------------------------------------------------------------------===// static MCInstPrinter *createX86MCInstPrinter(const Target &T, - TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(TM, MAI); + return new X86ATTInstPrinter(MAI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(TM, MAI); + return new X86IntelInstPrinter(MAI); return 0; } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 463cde0..6eed6ab 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -884,7 +884,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { // Kill registers by popping. if (Kills && I != MBB->begin()) { MachineBasicBlock::iterator I2 = llvm::prior(I); - for (;;) { + while (StackTop) { unsigned KReg = getStackEntry(0); if (!(Kills & (1 << KReg))) break; @@ -1467,25 +1467,24 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } if (STUses && !isMask_32(STUses)) - report_fatal_error("Inline asm fixed input regs" - " must be last on the x87 stack"); + MI->emitError("fixed input regs must be last on the x87 stack"); unsigned NumSTUses = CountTrailingOnes_32(STUses); // Defs must be contiguous from the stack top. ST0-STn. - if (STDefs && !isMask_32(STDefs)) - report_fatal_error("Inline asm output regs" - " must be last on the x87 stack"); + if (STDefs && !isMask_32(STDefs)) { + MI->emitError("output regs must be last on the x87 stack"); + STDefs = NextPowerOf2(STDefs) - 1; + } unsigned NumSTDefs = CountTrailingOnes_32(STDefs); // So must the clobbered stack slots. ST0-STm, m >= n. if (STClobbers && !isMask_32(STDefs | STClobbers)) - report_fatal_error("Inline asm clobbers must be last on the x87 stack"); + MI->emitError("clobbers must be last on the x87 stack"); // Popped inputs are the ones that are also clobbered or defined. unsigned STPopped = STUses & (STDefs | STClobbers); if (STPopped && !isMask_32(STPopped)) - report_fatal_error("Inline asm implicitly popped regs" - " must be last on the x87 stack"); + MI->emitError("implicitly popped regs must be last on the x87 stack"); unsigned NumSTPopped = CountTrailingOnes_32(STPopped); DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops " @@ -1501,7 +1500,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) continue; if (!Op.isUse()) - report_fatal_error("Illegal \"f\" output constraint in inline asm"); + MI->emitError("illegal \"f\" output constraint"); unsigned FPReg = getFPReg(Op); FPUsed |= 1U << FPReg; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index cd4e954..ed45a9a 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -1029,3 +1030,181 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, FrameIdx = 0; } } + +/// permuteEncode - Create the permutation encoding used with frameless +/// stacks. It is passed the number of registers to be saved and an array of the +/// registers saved. +static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) { + // The saved registers are numbered from 1 to 6. In order to encode the order + // in which they were saved, we re-number them according to their place in the + // register order. The re-numbering is relative to the last re-numbered + // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + bool Used[7] = { false, false, false, false, false, false, false }; + uint32_t RenumRegs[6]; + for (unsigned I = 0; I < SavedCount; ++I) { + uint32_t Renum = 0; + for (unsigned U = 1; U < 7; ++U) { + if (U == Registers[I]) + break; + if (!Used[U]) + ++Renum; + } + + Used[Registers[I]] = true; + RenumRegs[I] = Renum; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (SavedCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1] + + 3 * RenumRegs[2] + RenumRegs[3]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1] + + RenumRegs[2]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1]; + break; + case 1: + permutationEncoding |= RenumRegs[0]; + break; + } + + return permutationEncoding; +} + +uint32_t X86FrameLowering:: +getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, + int DataAlignmentFactor, bool IsEH) const { + uint32_t Encoding = 0; + int CFAOffset = 0; + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; + unsigned SavedRegIdx = 0; + int FramePointerReg = -1; + + for (ArrayRef<MCCFIInstruction>::const_iterator + I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { + const MCCFIInstruction &Inst = *I; + MCSymbol *Label = Inst.getLabel(); + + // Ignore invalid labels. + if (Label && !Label->isDefined()) continue; + + unsigned Operation = Inst.getOperation(); + if (Operation != MCCFIInstruction::Move && + Operation != MCCFIInstruction::RelMove) + // FIXME: We can't handle this frame just yet. + return 0; + + const MachineLocation &Dst = Inst.getDestination(); + const MachineLocation &Src = Inst.getSource(); + const bool IsRelative = (Operation == MCCFIInstruction::RelMove); + + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (Src.getReg() != MachineLocation::VirtualFP) { + // DW_CFA_def_cfa + assert(FramePointerReg == -1 &&"Defining more than one frame pointer?"); + if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP && + TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP) + // The frame pointer isn't EBP/RBP. Cannot make unwind information + // compact. + return 0; + FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH); + } // else DW_CFA_def_cfa_offset + + if (IsRelative) + CFAOffset += Src.getOffset(); + else + CFAOffset -= Src.getOffset(); + + continue; + } + + if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + // DW_CFA_def_cfa_register + assert(FramePointerReg == -1 && "Defining more than one frame pointer?"); + + if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP && + TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP) + // The frame pointer isn't EBP/RBP. Cannot make unwind information + // compact. + return 0; + + FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH); + if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg)) + return 0; + + SavedRegs[0] = 0; + SavedRegIdx = 0; + continue; + } + + unsigned Reg = Src.getReg(); + int Offset = Dst.getOffset(); + if (IsRelative) + Offset -= CFAOffset; + Offset /= DataAlignmentFactor; + + if (Offset < 0) { + // FIXME: Handle? + // DW_CFA_offset_extended_sf + return 0; + } else if (Reg < 64) { + // DW_CFA_offset + Reg + if (SavedRegIdx >= 6) return 0; + int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH); + if (CURegNum == -1) return 0; + SavedRegs[SavedRegIdx++] = CURegNum; + } else { + // FIXME: Handle? + // DW_CFA_offset_extended + return 0; + } + } + + // Bail if there are too many registers to encode. + if (SavedRegIdx > 6) return 0; + + // Check if the offset is too big. + CFAOffset /= 4; + if ((CFAOffset & 0xFF) != CFAOffset) + return 0; + Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding. + + if (FramePointerReg != -1) { + Encoding |= 0x01000000; // EBP/RBP Unwind Frame + for (unsigned I = 0; I != SavedRegIdx; ++I) { + unsigned Reg = SavedRegs[I]; + if (Reg == unsigned(FramePointerReg)) continue; + Encoding |= (Reg & 0x7) << (I * 3); // Register encoding + } + } else { + Encoding |= 0x02000000; // Frameless unwind with small stack + Encoding |= (SavedRegIdx & 0x7) << 10; + Encoding |= permuteEncode(SavedRegIdx, SavedRegs); + } + + return Encoding; +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index d71108c..14c31ed 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -15,6 +15,7 @@ #define X86_FRAMELOWERING_H #include "X86Subtarget.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -58,6 +59,9 @@ public: void getInitialFrameState(std::vector<MachineMove> &Moves) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + + uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, + int DataAlignmentFactor, bool IsEH) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index cc8db35..2b0f283 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -192,6 +192,7 @@ namespace { SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); + bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); @@ -547,6 +548,34 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() { EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); } +static bool isDispSafeForFrameIndex(int64_t Val) { + // On 64-bit platforms, we can run into an issue where a frame index + // includes a displacement that, when added to the explicit displacement, + // will overflow the displacement field. Assuming that the frame index + // displacement fits into a 31-bit integer (which is only slightly more + // aggressive than the current fundamental assumption that it fits into + // a 32-bit integer), a 31-bit disp should always be safe. + return isInt<31>(Val); +} + +bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, + X86ISelAddressMode &AM) { + int64_t Val = AM.Disp + Offset; + CodeModel::Model M = TM.getCodeModel(); + if (Subtarget->is64Bit()) { + if (!X86::isOffsetSuitableForCodeModel(Val, M, + AM.hasSymbolicDisplacement())) + return true; + // In addition to the checks required for a register base, check that + // we do not try to use an unsafe Disp with a frame index. + if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && + !isDispSafeForFrameIndex(Val)) + return true; + } + AM.Disp = Val; + return false; + +} bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ SDValue Address = N->getOperand(1); @@ -596,18 +625,22 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // must allow RIP. !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { - int64_t Offset = AM.Disp + G->getOffset(); - if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; + X86ISelAddressMode Backup = AM; AM.GV = G->getGlobal(); - AM.Disp = Offset; AM.SymbolFlags = G->getTargetFlags(); + if (FoldOffsetIntoAddress(G->getOffset(), AM)) { + AM = Backup; + return true; + } } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { - int64_t Offset = AM.Disp + CP->getOffset(); - if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; + X86ISelAddressMode Backup = AM; AM.CP = CP->getConstVal(); AM.Align = CP->getAlignment(); - AM.Disp = Offset; AM.SymbolFlags = CP->getTargetFlags(); + if (FoldOffsetIntoAddress(CP->getOffset(), AM)) { + AM = Backup; + return true; + } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { AM.ES = S->getSymbol(); AM.SymbolFlags = S->getTargetFlags(); @@ -689,7 +722,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { - bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); DEBUG({ dbgs() << "MatchAddress: "; @@ -699,8 +731,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (Depth > 5) return MatchAddressBase(N, AM); - CodeModel::Model M = TM.getCodeModel(); - // If this is already a %rip relative address, we can only merge immediates // into it. Instead of handling this in every case, we handle it here. // RIP relative addressing: %rip + 32-bit displacement! @@ -710,14 +740,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // consistency. if (!AM.ES && AM.JT != -1) return true; - if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { - int64_t Val = AM.Disp + Cst->getSExtValue(); - if (X86::isOffsetSuitableForCodeModel(Val, M, - AM.hasSymbolicDisplacement())) { - AM.Disp = Val; + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) + if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) return false; - } - } return true; } @@ -725,12 +750,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, default: break; case ISD::Constant: { uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, - AM.hasSymbolicDisplacement())) { - AM.Disp += Val; + if (!FoldOffsetIntoAddress(Val, AM)) return false; - } break; } @@ -746,8 +767,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; case ISD::FrameIndex: - if (AM.BaseType == X86ISelAddressMode::RegBase - && AM.Base_Reg.getNode() == 0) { + if (AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base_Reg.getNode() == 0 && + (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { AM.BaseType = X86ISelAddressMode::FrameIndexBase; AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); return false; @@ -776,16 +798,12 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); - uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(Disp, M, - AM.hasSymbolicDisplacement())) - AM.Disp = Disp; - else - AM.IndexReg = ShVal; - } else { - AM.IndexReg = ShVal; + uint64_t Disp = AddVal->getSExtValue() << Val; + if (!FoldOffsetIntoAddress(Disp, AM)) + return false; } + + AM.IndexReg = ShVal; return false; } break; @@ -819,13 +837,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, Reg = MulVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); - uint64_t Disp = AM.Disp + AddVal->getSExtValue() * - CN->getZExtValue(); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(Disp, M, - AM.hasSymbolicDisplacement())) - AM.Disp = Disp; - else + uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); + if (FoldOffsetIntoAddress(Disp, AM)) Reg = N.getNode()->getOperand(0); } else { Reg = N.getNode()->getOperand(0); @@ -950,19 +963,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (CurDAG->isBaseWithConstantOffset(N)) { X86ISelAddressMode Backup = AM; ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); - uint64_t Offset = CN->getSExtValue(); // Start with the LHS as an addr mode. if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && - // Address could not have picked a GV address for the displacement. - AM.GV == NULL && - // On x86-64, the resultant disp must fit in 32-bits. - (!is64Bit || - X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, - AM.hasSymbolicDisplacement()))) { - AM.Disp += Offset; + !FoldOffsetIntoAddress(CN->getSExtValue(), AM)) return false; - } AM = Backup; } break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4f8b90f..5096d9a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -235,10 +235,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); + setLibcallName(RTLIB::SREM_I64, "_allrem"); + setLibcallName(RTLIB::UREM_I64, "_aullrem"); + setLibcallName(RTLIB::MUL_I64, "_allmul"); setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2"); setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); } @@ -646,6 +652,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS } + // We don't support FMA. + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + // Long double always uses X87. if (!UseSoftFloat) { addRegisterClass(MVT::f80, X86::RFP80RegisterClass); @@ -670,6 +680,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f80 , Expand); setOperationAction(ISD::FCOS , MVT::f80 , Expand); } + + setOperationAction(ISD::FMA, MVT::f80, Expand); } // Always use a library call for pow. @@ -976,7 +988,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); - setOperationAction(ISD::LOAD, MVT::v8i32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); setOperationAction(ISD::LOAD, MVT::v4i64, Legal); @@ -994,63 +1005,58 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); - // Custom lower build_vector, vector_shuffle, scalar_to_vector, - // insert_vector_elt extract_subvector and extract_vector_elt for - // 256-bit types. - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-256-bit vectors - if (!isPowerOf2_32(MVT(VT).getVectorNumElements()) - || (MVT(VT).getSizeInBits() < 256)) - continue; - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - } - // Custom-lower insert_subvector and extract_subvector based on - // the result type. + // Custom lower several nodes for 256-bit types. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-256-bit vectors - if (!isPowerOf2_32(MVT(VT).getVectorNumElements())) + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; + + // Extract subvector is special because the value type + // (result) is 128-bit but the source is 256-bit wide. + if (VT.is128BitVector()) + setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom); + + // Do not attempt to custom lower other non-256-bit vectors + if (!VT.is256BitVector()) continue; - if (MVT(VT).getSizeInBits() == 128) { - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - } - else if (MVT(VT).getSizeInBits() == 256) { - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - } + setOperationAction(ISD::BUILD_VECTOR, SVT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom); } // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. - // Don't promote loads because we need them for VPERM vector index versions. + for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) { + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; - VT++) { - if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements()) - || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256)) + // Do not attempt to promote non-256-bit vectors + if (!VT.is256BitVector()) continue; - setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64); - //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); - //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64); + + setOperationAction(ISD::AND, SVT, Promote); + AddPromotedToType (ISD::AND, SVT, MVT::v4i64); + setOperationAction(ISD::OR, SVT, Promote); + AddPromotedToType (ISD::OR, SVT, MVT::v4i64); + setOperationAction(ISD::XOR, SVT, Promote); + AddPromotedToType (ISD::XOR, SVT, MVT::v4i64); + setOperationAction(ISD::LOAD, SVT, Promote); + AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64); + setOperationAction(ISD::SELECT, SVT, Promote); + AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64); } } + // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion + // of this type with custom code. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) { + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom); + } + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -3832,19 +3838,24 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, } /// getOnesVector - Returns a vector of specified type with all bits set. -/// +/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to +/// their original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); + assert((VT.is128BitVector() || VT.is256BitVector()) + && "Expected a 128-bit or 256-bit vector type"); - // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + SDValue Vec; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + if (VT.is256BitVector()) { + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + } else + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } - /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { @@ -4459,17 +4470,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return ConcatVectors(Lower, Upper, DAG); } - // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. - // All one's are handled with pcmpeqd. In AVX, zero's are handled with - // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd - // is present, so AllOnes is ignored. + // All zero's: + // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX) + // All one's: + // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX) if (ISD::isBuildVectorAllZeros(Op.getNode()) || - (Op.getValueType().getSizeInBits() != 256 && - ISD::isBuildVectorAllOnes(Op.getNode()))) { - // Canonicalize this to <4 x i32> (SSE) to + ISD::isBuildVectorAllOnes(Op.getNode())) { + // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32) + if (Op.getValueType() == MVT::v4i32 || + Op.getValueType() == MVT::v8i32) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) @@ -8916,8 +8927,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Lower SHL with variable shift amount. - // Cannot lower SHL without SSE4.1 or later. - if (!Subtarget->hasSSE41()) return SDValue(); + // Cannot lower SHL without SSE2 or later. + if (!Subtarget->hasSSE2()) return SDValue(); if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, @@ -9064,13 +9075,66 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return Sum; } +SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ + DebugLoc dl = Op.getDebugLoc(); + SDNode* Node = Op.getNode(); + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT VT = Node->getValueType(0); + + if (Subtarget->hasSSE2() && VT.isVector()) { + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); + SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32); + + unsigned SHLIntrinsicsID = 0; + unsigned SRAIntrinsicsID = 0; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i64: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q; + SRAIntrinsicsID = 0; + break; + } + case MVT::v4i32: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; + SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; + break; + } + case MVT::v8i16: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w; + SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w; + break; + } + } + + SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SHLIntrinsicsID, MVT::i32), + Node->getOperand(0), ShAmt); + + // In case of 1 bit sext, no need to shr + if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1; + + if (SRAIntrinsicsID) { + Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SRAIntrinsicsID, MVT::i32), + Tmp1, ShAmt); + } + return Tmp1; + } + + return SDValue(); +} + + SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - if (!Subtarget->hasSSE2()) { + // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. + // There isn't any reason to disable it if the target processor supports it. + if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, - Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base DAG.getTargetConstant(1, MVT::i8), // Scale @@ -9225,6 +9289,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); @@ -9323,6 +9388,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::SIGN_EXTEND_INREG: case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: @@ -9457,7 +9523,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; - case X86ISD::PANDN: return "X86ISD::PANDN"; + case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; @@ -11808,10 +11874,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Want to form PANDN nodes, in the hopes of then easily combining them with - // OR and AND nodes to form PBLEND/PSIGN. + // Want to form ANDNP nodes: + // 1) In the hopes of then easily combining them with OR and AND nodes + // to form PBLEND/PSIGN. + // 2) To match ANDN packed intrinsics EVT VT = N->getValueType(0); - if (VT != MVT::v2i64) + if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); SDValue N0 = N->getOperand(0); @@ -11821,12 +11889,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // Check LHS for vnot if (N0.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) - return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1); // Check RHS for vnot if (N1.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) - return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0); + return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0); return SDValue(); } @@ -11852,10 +11920,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (Subtarget->hasSSSE3()) { if (VT == MVT::v2i64) { // Canonicalize pandn to RHS - if (N0.getOpcode() == X86ISD::PANDN) + if (N0.getOpcode() == X86ISD::ANDNP) std::swap(N0, N1); // or (and (m, x), (pandn m, y)) - if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) { + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { SDValue Mask = N1.getOperand(0); SDValue X = N1.getOperand(1); SDValue Y; @@ -11864,7 +11932,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (N0.getOperand(1) == Mask) Y = N0.getOperand(0); - // Check to see if the mask appeared in both the AND and PANDN and + // Check to see if the mask appeared in both the AND and ANDNP and if (!Y.getNode()) return SDValue(); @@ -12592,6 +12660,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { case 'y': case 'x': case 'Y': + case 'l': return C_RegisterClass; case 'a': case 'b': @@ -12889,30 +12958,30 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // in the normal allocation? case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. if (Subtarget->is64Bit()) { - if (VT == MVT::i32) + if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, X86::GR32RegisterClass); else if (VT == MVT::i16) return std::make_pair(0U, X86::GR16RegisterClass); - else if (VT == MVT::i8) + else if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8RegisterClass); - else if (VT == MVT::i64) + else if (VT == MVT::i64 || VT == MVT::f64) return std::make_pair(0U, X86::GR64RegisterClass); break; } // 32-bit fallthrough case 'Q': // Q_REGS - if (VT == MVT::i32) + if (VT == MVT::i32 || VT == MVT::f32) return std::make_pair(0U, X86::GR32_ABCDRegisterClass); else if (VT == MVT::i16) return std::make_pair(0U, X86::GR16_ABCDRegisterClass); - else if (VT == MVT::i8) + else if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass); else if (VT == MVT::i64) return std::make_pair(0U, X86::GR64_ABCDRegisterClass); break; case 'r': // GENERAL_REGS case 'l': // INDEX_REGS - if (VT == MVT::i8) + if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8RegisterClass); if (VT == MVT::i16) return std::make_pair(0U, X86::GR16RegisterClass); @@ -12920,7 +12989,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, X86::GR32RegisterClass); return std::make_pair(0U, X86::GR64RegisterClass); case 'R': // LEGACY_REGS - if (VT == MVT::i8) + if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8_NOREXRegisterClass); if (VT == MVT::i16) return std::make_pair(0U, X86::GR16_NOREXRegisterClass); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d9c883f..b603678 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -169,8 +169,8 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - /// PANDN - and with not'd value. - PANDN, + /// ANDNP - Bitwise Logical AND NOT of Packed FP values. + ANDNP, /// PSIGNB/W/D - Copy integer sign. PSIGNB, PSIGNW, PSIGND, @@ -825,6 +825,7 @@ namespace llvm { SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 7daa264..6d89bcc 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -460,6 +460,11 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, Requires<[HasCLMUL]>; + +class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>; // FMA3 Instruction Templates diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7c9a9f7..b00109c 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -46,8 +46,8 @@ def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86pandn : SDNode<"X86ISD::PANDN", - SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, +def X86andnp : SDNode<"X86ISD::ANDNP", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86psignb : SDNode<"X86ISD::PSIGNB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, @@ -168,11 +168,13 @@ def ssmem : Operand<v4f32> { let PrintMethod = "printf32mem"; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def sdmem : Operand<v2f64> { let PrintMethod = "printf64mem"; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } //===----------------------------------------------------------------------===// @@ -301,6 +303,7 @@ def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; // 256-bit bitconvert pattern fragments def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; +def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 702331d..55b5835 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -36,7 +36,6 @@ #include <limits> #define GET_INSTRINFO_CTOR -#define GET_INSTRINFO_MC_DESC #include "X86GenInstrInfo.inc" using namespace llvm; @@ -301,12 +300,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 }, { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 }, { X86::MUL16r, X86::MUL16m, 1, 0 }, { X86::MUL32r, X86::MUL32m, 1, 0 }, { X86::MUL64r, X86::MUL64m, 1, 0 }, @@ -411,10 +415,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOV8rr, X86::MOV8rm, 0 }, { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 }, { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, 16 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, @@ -425,6 +432,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, @@ -787,6 +796,9 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOVAPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: return true; @@ -808,6 +820,9 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOVAPSmr: case X86::MOVAPDmr: case X86::MOVDQAmr: + case X86::VMOVAPSYmr: + case X86::VMOVAPDYmr: + case X86::VMOVDQAYmr: case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: @@ -926,6 +941,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVUPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: case X86::FsMOVAPSrm: @@ -1975,6 +1994,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = X86::MOV8rr; } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) Opc = X86::MOVAPSrr; + else if (X86::VR256RegClass.contains(DestReg, SrcReg)) + Opc = X86::VMOVAPSYrr; else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; else @@ -2064,6 +2085,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::MOVAPSrm : X86::MOVAPSmr; else return load ? X86::MOVUPSrm : X86::MOVUPSmr; + case 32: + assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); + // If stack is realigned we can use aligned stores. + if (isStackAligned) + return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; + else + return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr; } } @@ -2853,6 +2881,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::VMOVDQUYrm: break; } switch (Opc2) { @@ -2875,6 +2908,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::VMOVDQUYrm: break; } @@ -3053,6 +3091,13 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, + // AVX 256-bit support + { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, + { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, + { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, + { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, + { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, + { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, }; // FIXME: Some shuffle and unpack instructions have equivalents in different diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8cab808..7eb07b0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -251,6 +251,7 @@ class X86MemOperand<string printMethod> : Operand<iPTR> { let ParserMatchClass = X86MemAsmOperand; } +let OperandType = "OPERAND_MEMORY" in { def opaque32mem : X86MemOperand<"printopaquemem">; def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; @@ -267,6 +268,7 @@ def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; def f256mem : X86MemOperand<"printf256mem">; +} // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. @@ -274,6 +276,7 @@ def i8mem_NOREX : Operand<i64> { let PrintMethod = "printi8mem"; let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } // GPRs available for tailcall. @@ -287,6 +290,7 @@ def i32mem_TC : Operand<i32> { let PrintMethod = "printi32mem"; let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } // Special i64mem for addresses of load folding tail calls. These are not @@ -297,9 +301,11 @@ def i64mem_TC : Operand<i64> { let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } -let ParserMatchClass = X86AbsMemAsmOperand, +let OperandType = "OPERAND_PCREL", + ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "print_pcrel_imm" in { def i32imm_pcrel : Operand<i32>; def i16imm_pcrel : Operand<i16>; @@ -317,6 +323,7 @@ def brtarget8 : Operand<OtherVT>; def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; + let OperandType = "OPERAND_IMMEDIATE"; } class ImmSExtAsmOperandClass : AsmOperandClass { @@ -363,15 +370,18 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { // 16-bits but only 8 bits are significant. def i16i8imm : Operand<i16> { let ParserMatchClass = ImmSExti16i8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 32-bits but only 8 bits are significant. def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExti32i8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 32 bits are significant. def i64i32imm : Operand<i64> { let ParserMatchClass = ImmSExti64i32AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 32 bits are significant, and those bits are treated as being @@ -438,8 +448,10 @@ def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; -def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate; -def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate; +def In32BitMode : Predicate<"!Subtarget->is64Bit()">, + AssemblerPredicate<"!Mode64Bit">; +def In64BitMode : Predicate<"Subtarget->is64Bit()">, + AssemblerPredicate<"Mode64Bit">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; @@ -669,7 +681,7 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; } let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), +def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", []>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{q}\t$imm", []>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0bfc5e7..fe11d77 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -512,6 +512,26 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W; +let Predicates = [HasAVX] in { + def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; + + def : Pat<(f32 (sint_to_fp GR32:$src)), + (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; + def : Pat<(f32 (sint_to_fp GR64:$src)), + (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; + def : Pat<(f64 (sint_to_fp GR32:$src)), + (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; + def : Pat<(f64 (sint_to_fp GR64:$src)), + (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; +} + defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, @@ -1473,83 +1493,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, - SDNode OpNode, int HasPat = 0, - list<list<dag>> Pattern = []> { + SDNode OpNode> { let Pattern = []<dag> in { defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - !if(HasPat, Pattern[0], // rr - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, - VR128:$src2)))]), - !if(HasPat, Pattern[2], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]), 0>, - VEX_4V; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, - !if(HasPat, Pattern[1], // rr - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 - VR128:$src2))))]), - !if(HasPat, Pattern[3], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]), 0>, - OpSize, VEX_4V; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - !if(HasPat, Pattern[0], // rr - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, - VR128:$src2)))]), - !if(HasPat, Pattern[2], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))])>, TB; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB; defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, - !if(HasPat, Pattern[1], // rr - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 - VR128:$src2))))]), - !if(HasPat, Pattern[3], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))])>, - TB, OpSize; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB, OpSize; } } /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms /// -multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> { +multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, + SDNode OpNode> { defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V; + !strconcat(OpcodeStr, "ps"), f256mem, + [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, VEX_4V; defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V; + !strconcat(OpcodeStr, "pd"), f256mem, + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (bc_v4i64 (v4f64 VR256:$src2))))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } // AVX 256-bit packed logical ops forms -defm VAND : sse12_fp_packed_logical_y<0x54, "and">; -defm VOR : sse12_fp_packed_logical_y<0x56, "or">; -defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">; -let isCommutable = 0 in - defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">; +defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; +defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; +defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in - defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ - // single r+r - [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))], - // double r+r - [], - // single r+m - [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))], - // double r+m - []]>; + defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions @@ -2037,7 +2042,10 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", } // The same as done above but for AVX. The 128-bit versions are the -// same, but re-encoded. The 256-bit does not support PI version. +// same, but re-encoded. The 256-bit does not support PI version, and +// doesn't need it because on sandy bridge the register is set to zero +// at the rename stage without using any execution unit, so SET0PSY +// and SET0PDY can be used for vector int instructions without penalty // FIXME: Change encoding to pseudo! This is blocked right now by the x86 // JIT implementatioan, it does not expand the instructions below like // X86MCInstLower does. @@ -2052,8 +2060,8 @@ def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; let ExeDomain = SSEPackedInt in -def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; +def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; } def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; @@ -3660,6 +3668,19 @@ let Predicates = [HasXMMInt] in { let Predicates = [HasAVX] in { def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; } // Move scalar to XMM zero-extended @@ -3842,6 +3863,8 @@ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; // Use movaps / movups for SSE integer load / store (one byte shorter). +// The instructions selected below are then converted to MOVDQA/MOVDQU +// during the SSE domain pass. let Predicates = [HasSSE1] in { def : Pat<(alignedloadv4i32 addr:$src), (MOVAPSrm addr:$src)>; @@ -3870,8 +3893,9 @@ let Predicates = [HasSSE1] in { (MOVUPSmr addr:$dst, VR128:$src)>; } -// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter). +// Use vmovaps/vmovups for AVX integer load/store. let Predicates = [HasAVX] in { + // 128-bit load/store def : Pat<(alignedloadv4i32 addr:$src), (VMOVAPSrm addr:$src)>; def : Pat<(loadv4i32 addr:$src), @@ -3897,6 +3921,24 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (VMOVUPSmr addr:$dst, VR128:$src)>; + + // 256-bit load/store + def : Pat<(alignedloadv4i64 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv4i64 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedloadv8i32 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv8i32 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v4i64 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v8i32 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; } //===----------------------------------------------------------------------===// @@ -5195,33 +5237,52 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), // CLMUL Instructions //===----------------------------------------------------------------------===// -// Only the AVX version of CLMUL instructions are described here. - // Carry-less Multiplication instructions -def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +let Constraints = "$src1 = $dst" in { +def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>; + +def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>; +} + +// AVX carry-less Multiplication instructions +def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; -def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; -// Assembler Only -multiclass avx_vpclmul<string asm> { - def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; - - def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; -} -defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">; -defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">; -defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">; -defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">; + +multiclass pclmul_alias<string asm, int immop> { + def : InstAlias<!strconcat("pclmul", asm, + "dq {$src, $dst|$dst, $src}"), + (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>; + + def : InstAlias<!strconcat("pclmul", asm, + "dq {$src, $dst|$dst, $src}"), + (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>; + + def : InstAlias<!strconcat("vpclmul", asm, + "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), + (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>; + + def : InstAlias<!strconcat("vpclmul", asm, + "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), + (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>; +} +defm : pclmul_alias<"hqhq", 0x11>; +defm : pclmul_alias<"hqlq", 0x01>; +defm : pclmul_alias<"lqhq", 0x10>; +defm : pclmul_alias<"lqlq", 0x00>; //===----------------------------------------------------------------------===// // AVX Instructions diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 04149e7..ce8ef49 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -18,26 +18,32 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; namespace { class X86MCCodeEmitter : public MCCodeEmitter { X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - const TargetMachine &TM; - const TargetInstrInfo &TII; + const MCInstrInfo &MCII; + const MCSubtargetInfo &STI; MCContext &Ctx; - bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) - : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { - Is64BitMode = is64Bit; + X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii), STI(sti), Ctx(ctx) { } ~X86MCCodeEmitter() {} + bool is64BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode64Bit) != 0; + } + static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } @@ -126,16 +132,10 @@ public: } // end anonymous namespace -MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &, - TargetMachine &TM, - MCContext &Ctx) { - return new X86MCCodeEmitter(TM, Ctx, false); -} - -MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, - TargetMachine &TM, - MCContext &Ctx) { - return new X86MCCodeEmitter(TM, Ctx, true); +MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new X86MCCodeEmitter(MCII, STI, Ctx); } /// isDisp8 - Return true if this signed displacement fits in a 8-bit @@ -245,7 +245,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Handle %rip relative addressing. if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode - assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode"); + assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode"); assert(IndexReg.getReg() == 0 && "Invalid rip-relative address"); EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); @@ -284,7 +284,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). - (!Is64BitMode || BaseReg != 0)) { + (!is64BitMode() || BaseReg != 0)) { if (BaseReg == 0) { // [disp32] in X86-32 mode EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); @@ -729,7 +729,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Emit the address size opcode prefix as needed. if ((TSFlags & X86II::AdSize) || - (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand))) + (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand))) EmitByte(0x67, CurByte, OS); // Emit the operand size opcode prefix as needed. @@ -772,7 +772,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Handle REX prefix. // FIXME: Can this come before F2 etc to simplify emission? - if (Is64BitMode) { + if (is64BitMode()) { if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) EmitByte(0x40 | REX, CurByte, OS); } @@ -803,7 +803,7 @@ void X86MCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { unsigned Opcode = MI.getOpcode(); - const MCInstrDesc &Desc = TII.get(Opcode); + const MCInstrDesc &Desc = MCII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; // Pseudo instructions don't get encoded. diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 793156f..e385335 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -16,8 +16,8 @@ #include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" -#include "X86MCAsmInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index d32b822..f2faf59 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -40,7 +40,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CommandLine.h" -#define GET_REGINFO_MC_DESC #define GET_REGINFO_TARGET_DESC #include "X86GenRegisterInfo.inc" @@ -107,8 +106,8 @@ int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { /// getCompactUnwindRegNum - This function maps the register to the number for /// compact unwind encoding. Return -1 if the register isn't valid. -int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum) const { - switch (RegNum) { +int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { + switch (getLLVMRegNum(RegNum, isEH)) { case X86::EBX: case X86::RBX: return 1; case X86::ECX: case X86::R12: return 2; case X86::EDX: case X86::R13: return 3; @@ -730,7 +729,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. - int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm()); + int Imm = (int)(MI.getOperand(i + 3).getImm()); + int Offset = FIOffset + Imm; + assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && + "Requesting 64-bit offset in 32-bit immediate!"); MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index a09c7ee..a12eb12 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -83,7 +83,7 @@ public: /// getCompactUnwindRegNum - This function maps the register to the number for /// compact unwind encoding. Return -1 if the register isn't valid. - int getCompactUnwindRegNum(unsigned RegNum) const; + int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const; /// Code Generation virtual methods... /// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index a1e6d7b..5e6c659 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -21,9 +21,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallVector.h" -#define GET_SUBTARGETINFO_CTOR -#define GET_SUBTARGETINFO_MC_DESC #define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR #include "X86GenSubtargetInfo.inc" using namespace llvm; @@ -158,7 +157,7 @@ const char *X86Subtarget::getBZeroEntry() const { /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { - if (Is64Bit) + if (In64BitMode) return false; return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } @@ -174,73 +173,6 @@ unsigned X86Subtarget::getSpecialAddressLatency() const { return 200; } -/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the -/// specified arguments. If we can't run cpuid on the host, return true. -static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) - #if defined(__GNUC__) - // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. - asm ("movq\t%%rbx, %%rsi\n\t" - "cpuid\n\t" - "xchgq\t%%rbx, %%rsi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value)); - return false; - #elif defined(_MSC_VER) - int registers[4]; - __cpuid(registers, value); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; - return false; - #endif -#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) - #if defined(__GNUC__) - asm ("movl\t%%ebx, %%esi\n\t" - "cpuid\n\t" - "xchgl\t%%ebx, %%esi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value)); - return false; - #elif defined(_MSC_VER) - __asm { - mov eax,value - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } - return false; - #endif -#endif - return true; -} - -static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) { - Family = (EAX >> 8) & 0xf; // Bits 8 - 11 - Model = (EAX >> 4) & 0xf; // Bits 4 - 7 - if (Family == 6 || Family == 0xf) { - if (Family == 0xf) - // Examine extended family ID if family ID is F. - Family += (EAX >> 20) & 0xff; // Bits 20 - 27 - // Examine extended model ID if family ID is 6 or F. - Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 - } -} - void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; union { @@ -248,51 +180,65 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { char c[12]; } text; - if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) + if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) return; - GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); + X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 15) & 1) HasCMov = true; - if ((EDX >> 23) & 1) X86SSELevel = MMX; - if ((EDX >> 25) & 1) X86SSELevel = SSE1; - if ((EDX >> 26) & 1) X86SSELevel = SSE2; - if (ECX & 0x1) X86SSELevel = SSE3; - if ((ECX >> 9) & 1) X86SSELevel = SSSE3; - if ((ECX >> 19) & 1) X86SSELevel = SSE41; - if ((ECX >> 20) & 1) X86SSELevel = SSE42; + if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV); + if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); + if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); + if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); + if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); + if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3); + if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41); + if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42); // FIXME: AVX codegen support is not ready. - //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; } + //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX); bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); - HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); - HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); - HasAES = IsIntel && ((ECX >> 25) & 0x1); + HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL); + HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3); + HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT); + HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES); if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. unsigned Family = 0; unsigned Model = 0; - DetectFamilyModel(EAX, Family, Model); - IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); + X86_MC::DetectFamilyModel(EAX, Family, Model); + if (IsAMD || (Family == 6 && Model >= 13)) { + IsBTMemSlow = true; + ToggleFeature(X86::FeatureSlowBTMem); + } // If it's Nehalem, unaligned memory access is fast. - if (Family == 15 && Model == 26) + if (Family == 15 && Model == 26) { IsUAMemFast = true; + ToggleFeature(X86::FeatureFastUAMem); + } - GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - HasX86_64 = (EDX >> 29) & 0x1; - HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); - HasFMA4 = IsAMD && ((ECX >> 16) & 0x1); + X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if ((EDX >> 29) & 0x1) { + HasX86_64 = true; + ToggleFeature(X86::Feature64Bit); + } + if (IsAMD && ((ECX >> 6) & 0x1)) { + HasSSE4A = true; + ToggleFeature(X86::FeatureSSE4A); + } + if (IsAMD && ((ECX >> 16) & 0x1)) { + HasFMA4 = true; + ToggleFeature(X86::FeatureFMA4); + } } } X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, - bool is64Bit, unsigned StackAlignOverride) - : X86GenSubtargetInfo() + unsigned StackAlignOverride, bool is64Bit) + : X86GenSubtargetInfo(TT, CPU, FS) , PICStyle(PICStyles::None) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) @@ -312,40 +258,59 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) , TargetTriple(TT) - , Is64Bit(is64Bit) { - + , In64BitMode(is64Bit) { // Determine default and user specified characteristics - if (!CPU.empty() || !FS.empty()) { - // If feature string is not empty, parse features string. + if (!FS.empty() || !CPU.empty()) { std::string CPUName = CPU; - if (CPUName.empty()) + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) CPUName = sys::getHostCPUName(); - ParseSubtargetFeatures(FS, CPUName); - // All X86-64 CPUs also have SSE2, however user might request no SSE via - // -mattr, so don't force SSELevel here. - if (HasAVX) - X86SSELevel = NoMMXSSE; +#else + CPUName = "generic"; +#endif + } + + // Make sure 64-bit features are available in 64-bit mode. (But make sure + // SSE2 can be turned off explicitly.) + std::string FullFS = FS; + if (In64BitMode) { + if (!FullFS.empty()) + FullFS = "+64bit,+sse2," + FullFS; + else + FullFS = "+64bit,+sse2"; + } + + // If feature string is not empty, parse features string. + ParseSubtargetFeatures(CPUName, FullFS); } else { // Otherwise, use CPUID to auto-detect feature set. AutoDetectSubtargetFeatures(); - // Make sure SSE2 is enabled; it is available on all X86-64 CPUs. - if (Is64Bit && !HasAVX && X86SSELevel < SSE2) - X86SSELevel = SSE2; - } - // If requesting codegen for X86-64, make sure that 64-bit features - // are enabled. - if (Is64Bit) { - HasX86_64 = true; + // Make sure 64-bit features are available in 64-bit mode. + if (In64BitMode) { + HasX86_64 = true; ToggleFeature(X86::Feature64Bit); + HasCMov = true; ToggleFeature(X86::FeatureCMOV); - // All 64-bit cpus have cmov support. - HasCMov = true; + if (!HasAVX && X86SSELevel < SSE2) { + X86SSELevel = SSE2; + ToggleFeature(X86::FeatureSSE1); + ToggleFeature(X86::FeatureSSE2); + } + } } + + // It's important to keep the MCSubtargetInfo feature bits in sync with + // target data structure which is shared with MC code emitter, etc. + if (In64BitMode) + ToggleFeature(X86::Mode64Bit); + + if (HasAVX) + X86SSELevel = NoMMXSSE; DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); - assert((!Is64Bit || HasX86_64) && + assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both @@ -353,6 +318,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, if (StackAlignOverride) stackAlignment = StackAlignOverride; else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || - isTargetSolaris() || Is64Bit) + isTargetSolaris() || In64BitMode) stackAlignment = 16; } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index d49b871..6d22027 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -24,6 +24,7 @@ namespace llvm { class GlobalValue; +class StringRef; class TargetMachine; /// PICStyles - The X86 backend supports a number of different styles of PIC. @@ -111,9 +112,8 @@ protected: Triple TargetTriple; private: - /// Is64Bit - True if the processor supports 64-bit instructions and - /// pointer size is 64 bit. - bool Is64Bit; + /// In64BitMode - True if compiling for 64-bit, false for 32-bit. + bool In64BitMode; public: @@ -121,8 +121,8 @@ public: /// of the specified triple. /// X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit, - unsigned StackAlignOverride); + const std::string &FS, + unsigned StackAlignOverride, bool is64Bit); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every @@ -135,13 +135,13 @@ public: /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID /// instruction. void AutoDetectSubtargetFeatures(); - bool is64Bit() const { return Is64Bit; } + bool is64Bit() const { return In64BitMode; } PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } @@ -199,7 +199,7 @@ public: } bool isTargetWin64() const { - return Is64Bit && (isTargetMingw() || isTargetWindows()); + return In64BitMode && (isTargetMingw() || isTargetWindows()); } bool isTargetEnvMacho() const { @@ -207,7 +207,7 @@ public: } bool isTargetWin32() const { - return !Is64Bit && (isTargetMingw() || isTargetWindows()); + return !In64BitMode && (isTargetMingw() || isTargetWindows()); } bool isPICStyleSet() const { return PICStyle != PICStyles::None; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 1b6fa30..9cab0e0 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "X86MCAsmInfo.h" #include "X86TargetMachine.h" #include "X86.h" #include "llvm/PassManager.h" @@ -24,22 +23,6 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { - if (TheTriple.getArch() == Triple::x86_64) - return new X86_64MCAsmInfoDarwin(TheTriple); - else - return new X86MCAsmInfoDarwin(TheTriple); - } - - if (TheTriple.isOSWindows()) - return new X86MCAsmInfoCOFF(TheTriple); - - return new X86ELFMCAsmInfo(TheTriple); -} - static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCContext &Ctx, TargetAsmBackend &TAB, raw_ostream &_OS, @@ -62,15 +45,11 @@ extern "C" void LLVMInitializeX86Target() { RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); - // Register the target asm info. - RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo); - RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); - // Register the code emitter. TargetRegistry::RegisterCodeEmitter(TheX86_32Target, - createX86_32MCCodeEmitter); + createX86MCCodeEmitter); TargetRegistry::RegisterCodeEmitter(TheX86_64Target, - createX86_64MCCodeEmitter); + createX86MCCodeEmitter); // Register the asm backend. TargetRegistry::RegisterAsmBackend(TheX86_32Target, @@ -119,8 +98,8 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT), - Subtarget(TT, CPU, FS, is64Bit, StackAlignmentOverride), + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { DefRelocModel = getRelocationModel(); |