aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp72
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp8
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h6
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp2
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp3
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp (renamed from lib/Target/X86/X86MCAsmInfo.cpp)35
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h (renamed from lib/Target/X86/X86MCAsmInfo.h)0
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp185
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h60
-rw-r--r--lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp74
-rw-r--r--lib/Target/X86/MCTargetDesc/X86TargetDesc.h34
-rw-r--r--lib/Target/X86/X86.h13
-rw-r--r--lib/Target/X86/X86.td7
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp3
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp5
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp19
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp179
-rw-r--r--lib/Target/X86/X86FrameLowering.h4
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp101
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp231
-rw-r--r--lib/Target/X86/X86ISelLowering.h5
-rw-r--r--lib/Target/X86/X86InstrFormats.td5
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td7
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp47
-rw-r--r--lib/Target/X86/X86InstrInfo.td20
-rw-r--r--lib/Target/X86/X86InstrSSE.td201
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp42
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp10
-rw-r--r--lib/Target/X86/X86RegisterInfo.h2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp197
-rw-r--r--lib/Target/X86/X86Subtarget.h18
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp29
36 files changed, 1041 insertions, 596 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c352bfc..d45dd35 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -15,9 +15,11 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -25,17 +27,15 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
namespace {
struct X86Operand;
class X86ATTAsmParser : public TargetAsmParser {
+ MCSubtargetInfo &STI;
MCAsmParser &Parser;
- TargetMachine &TM;
-
-protected:
- unsigned Is64Bit : 1;
private:
MCAsmParser &getParser() const { return Parser; }
@@ -61,6 +61,11 @@ private:
/// or %es:(%edi) in 32bit mode.
bool isDstOp(X86Operand &Op);
+ bool is64BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+
/// @name Auto-generated Matcher Functions
/// {
@@ -70,12 +75,11 @@ private:
/// }
public:
- X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM)
- : TargetAsmParser(T), Parser(parser), TM(TM) {
+ X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+ : TargetAsmParser(), STI(sti), Parser(parser) {
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(
- &TM.getSubtarget<X86Subtarget>()));
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
@@ -84,23 +88,6 @@ public:
virtual bool ParseDirective(AsmToken DirectiveID);
};
-
-class X86_32ATTAsmParser : public X86ATTAsmParser {
-public:
- X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
- : X86ATTAsmParser(T, Parser, TM) {
- Is64Bit = false;
- }
-};
-
-class X86_64ATTAsmParser : public X86ATTAsmParser {
-public:
- X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
- : X86ATTAsmParser(T, Parser, TM) {
- Is64Bit = true;
- }
-};
-
} // end anonymous namespace
/// @name Auto-generated Match Functions
@@ -155,7 +142,7 @@ struct X86Operand : public MCParsedAsmOperand {
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
- virtual void dump(raw_ostream &OS) const {}
+ virtual void print(raw_ostream &OS) const {}
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
@@ -365,7 +352,7 @@ struct X86Operand : public MCParsedAsmOperand {
} // end anonymous namespace.
bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
- unsigned basereg = Is64Bit ? X86::RSI : X86::ESI;
+ unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
return (Op.isMem() &&
(Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
@@ -375,7 +362,7 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
}
bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
- unsigned basereg = Is64Bit ? X86::RDI : X86::EDI;
+ unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
return Op.isMem() && Op.Mem.SegReg == X86::ES &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
@@ -406,7 +393,7 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
// FIXME: This should be done using Requires<In32BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions
// can be also checked.
- if (RegNo == X86::RIZ && !Is64Bit)
+ if (RegNo == X86::RIZ && !is64BitMode())
return Error(Tok.getLoc(), "riz register in 64-bit mode only");
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@@ -710,23 +697,6 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
- // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
- if (PatchedName.startswith("vpclmul")) {
- unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
- PatchedName.slice(7, PatchedName.size() - 2))
- .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
- .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
- .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
- .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
- .Default(~0U);
- if (CLMULQuadWordSelect != ~0U) {
- ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
- getParser().getContext());
- assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
- PatchedName = "vpclmulqdq";
- }
- }
-
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (ExtraImmOp)
@@ -843,7 +813,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
// Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
if (Name.startswith("movs") && Operands.size() == 3 &&
(Name == "movsb" || Name == "movsw" || Name == "movsl" ||
- (Is64Bit && Name == "movsq"))) {
+ (is64BitMode() && Name == "movsq"))) {
X86Operand &Op = *(X86Operand*)Operands.begin()[1];
X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
if (isSrcOp(Op) && isDstOp(Op2)) {
@@ -856,7 +826,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
// Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
if (Name.startswith("lods") && Operands.size() == 3 &&
(Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
- Name == "lodsl" || (Is64Bit && Name == "lodsq"))) {
+ Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
if (isSrcOp(*Op1) && Op2->isReg()) {
@@ -886,7 +856,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
// Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
if (Name.startswith("stos") && Operands.size() == 3 &&
(Name == "stos" || Name == "stosb" || Name == "stosw" ||
- Name == "stosl" || (Is64Bit && Name == "stosq"))) {
+ Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
if (isDstOp(*Op2) && Op1->isReg()) {
@@ -1161,8 +1131,8 @@ extern "C" void LLVMInitializeX86AsmLexer();
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
- RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
- RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
+ RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+ RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
LLVMInitializeX86AsmLexer();
}
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 633d982..b112f9f 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -27,7 +27,6 @@ set(sources
X86InstrInfo.cpp
X86JITInfo.cpp
X86MachObjectWriter.cpp
- X86MCAsmInfo.cpp
X86MCCodeEmitter.cpp
X86MCInstLower.cpp
X86RegisterInfo.cpp
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 53738b1..c37d879 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -15,8 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
-#include "X86Subtarget.h"
-#include "MCTargetDesc/X86TargetDesc.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -31,11 +30,8 @@ using namespace llvm;
#define PRINT_ALIAS_INSTR
#include "X86GenAsmWriter.inc"
-X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI)
: MCInstPrinter(MAI) {
- // Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(
- &TM.getSubtarget<X86Subtarget>()));
}
void X86ATTInstPrinter::printRegName(raw_ostream &OS,
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 5f939b6..5426e5c 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -19,19 +19,15 @@
namespace llvm {
class MCOperand;
-class X86Subtarget;
-class TargetMachine;
class X86ATTInstPrinter : public MCInstPrinter {
public:
- X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI);
+ X86ATTInstPrinter(const MCAsmInfo &MAI);
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
- // Methods used to print the alias of an instruction.
- unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 5461c83..4e28dfe 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "X86InstComments.h"
-#include "MCTargetDesc/X86TargetDesc.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
#include "../Utils/X86ShuffleDecode.h"
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 411d832..506e26c 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -15,8 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
#include "X86InstComments.h"
-#include "X86Subtarget.h"
-#include "MCTargetDesc/X86TargetDesc.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index c8030c3..e84a194 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -20,11 +20,10 @@
namespace llvm {
class MCOperand;
-class TargetMachine;
class X86IntelInstPrinter : public MCInstPrinter {
public:
- X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+ X86IntelInstPrinter(const MCAsmInfo &MAI)
: MCInstPrinter(MAI) {}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 50be61c..ca88f8f 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -1,2 +1,7 @@
-add_llvm_library(LLVMX86Desc X86TargetDesc.cpp)
+add_llvm_library(LLVMX86Desc
+ X86MCTargetDesc.cpp
+ X86MCAsmInfo.cpp
+ )
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 2e1ec63..2703100 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "X86MCAsmInfo.h"
-#include "X86TargetMachine.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -45,14 +44,17 @@ static const char *const x86_asm_table[] = {
"{flags}", "",
"{dirflag}", "",
"{fpsr}", "",
+ "{fpcr}", "",
"{cc}", "cc",
0,0};
-X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::x86_64;
+ if (is64Bit)
+ PointerSize = 8;
+
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
-
- bool is64Bit = Triple.getArch() == Triple::x86_64;
TextAlignFillValue = 0x90;
@@ -74,22 +76,14 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
ExceptionsType = ExceptionHandling::DwarfCFI;
}
-const MCExpr *
-X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
- unsigned Encoding,
- MCStreamer &Streamer) const {
- MCContext &Context = Streamer.getContext();
- const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
- const MCExpr *Four = MCConstantExpr::Create(4, Context);
- return MCBinaryExpr::CreateAdd(Res, Four, Context);
-}
-
X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
: X86MCAsmInfoDarwin(Triple) {
}
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
+ if (T.getArch() == Triple::x86_64)
+ PointerSize = 8;
+
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
@@ -114,6 +108,17 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
Data64bitsDirective = 0;
}
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::Create(4, Context);
+ return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
const MCSection *X86ELFMCAsmInfo::
getNonexecutableStackSection(MCContext &Ctx) const {
return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 2cd4c8e..2cd4c8e 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
new file mode 100644
index 0000000..b77f37b
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -0,0 +1,185 @@
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetDesc.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Host.h"
+
+#define GET_REGINFO_MC_DESC
+#include "X86GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+
+std::string X86_MC::ParseX86Triple(StringRef TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::x86_64)
+ return "+64bit-mode";
+ return "-64bit-mode";
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #endif
+#endif
+ return true;
+}
+
+void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
+ unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ std::string ArchFS = X86_MC::ParseX86Triple(TT);
+ if (!FS.empty()) {
+ if (!ArchFS.empty())
+ ArchFS = ArchFS + "," + FS.str();
+ else
+ ArchFS = FS;
+ }
+
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+ CPUName = sys::getHostCPUName();
+#else
+ CPUName = "generic";
+#endif
+ }
+
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86MCSubtargetInfo() {
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+ X86_MC::createX86MCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+ X86_MC::createX86MCSubtargetInfo);
+}
+
+static MCInstrInfo *createX86MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitX86MCInstrInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeX86MCInstrInfo() {
+ TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+}
+
+static MCRegisterInfo *createX86MCRegisterInfo() {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitX86MCRegisterInfo(X);
+ return X;
+}
+
+extern "C" void LLVMInitializeX86MCRegInfo() {
+ TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+}
+
+
+static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ if (TheTriple.getArch() == Triple::x86_64)
+ return new X86_64MCAsmInfoDarwin(TheTriple);
+ else
+ return new X86MCAsmInfoDarwin(TheTriple);
+ }
+
+ if (TheTriple.isOSWindows())
+ return new X86MCAsmInfoCOFF(TheTriple);
+
+ return new X86ELFMCAsmInfo(TheTriple);
+}
+
+extern "C" void LLVMInitializeX86MCAsmInfo() {
+ // Register the target asm info.
+ RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
+ RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
new file mode 100644
index 0000000..89ea22b
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -0,0 +1,60 @@
+//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCTARGETDESC_H
+#define X86MCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+namespace X86_MC {
+ std::string ParseX86Triple(StringRef TT);
+
+ /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+ /// the specified arguments. If we can't run cpuid on the host, return true.
+ bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+
+ void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
+
+ /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+ /// This is exposed so Asm parser, etc. do not need to go through
+ /// TargetRegistry.
+ MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+} // End llvm namespace
+
+
+// Defines symbolic names for X86 registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "X86GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp
deleted file mode 100644
index c7c37f6..0000000
--- a/lib/Target/X86/MCTargetDesc/X86TargetDesc.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//===-- X86TargetDesc.cpp - X86 Target Descriptions -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides X86 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86TargetDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
-
-#define GET_REGINFO_MC_DESC
-#include "X86GenRegisterInfo.inc"
-
-#define GET_INSTRINFO_MC_DESC
-#include "X86GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "X86GenSubtargetInfo.inc"
-
-using namespace llvm;
-
-MCInstrInfo *createX86MCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitX86MCInstrInfo(X);
- return X;
-}
-
-MCRegisterInfo *createX86MCRegisterInfo() {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitX86MCRegisterInfo(X);
- return X;
-}
-
-MCSubtargetInfo *createX86MCSubtargetInfo() {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitX86MCSubtargetInfo(X);
- return X;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeX86MCInstrInfo() {
- RegisterMCInstrInfo<MCInstrInfo> X(TheX86_32Target);
- RegisterMCInstrInfo<MCInstrInfo> Y(TheX86_64Target);
-
- TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
-}
-
-extern "C" void LLVMInitializeX86MCRegInfo() {
- RegisterMCRegInfo<MCRegisterInfo> X(TheX86_32Target);
- RegisterMCRegInfo<MCRegisterInfo> Y(TheX86_64Target);
-
- TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
-}
-
-extern "C" void LLVMInitializeX86MCSubtargetInfo() {
- RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheX86_32Target);
- RegisterMCSubtargetInfo<MCSubtargetInfo> Y(TheX86_64Target);
-
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
- createX86MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
- createX86MCSubtargetInfo);
-}
diff --git a/lib/Target/X86/MCTargetDesc/X86TargetDesc.h b/lib/Target/X86/MCTargetDesc/X86TargetDesc.h
deleted file mode 100644
index 9ab622d..0000000
--- a/lib/Target/X86/MCTargetDesc/X86TargetDesc.h
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- X86TargetDesc.h - X86 Target Descriptions ---------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides X86 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86TARGETDESC_H
-#define X86TARGETDESC_H
-
-namespace llvm {
-class Target;
-
-extern Target TheX86_32Target, TheX86_64Target;
-} // End llvm namespace
-
-// Defines symbolic names for X86 registers. This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "X86GenRegisterInfo.inc"
-
-// Defines symbolic names for the X86 instructions.
-//
-#define GET_INSTRINFO_ENUM
-#include "X86GenInstrInfo.inc"
-
-#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 9d66c2f..ec52dfb 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -15,7 +15,7 @@
#ifndef TARGET_X86_H
#define TARGET_X86_H
-#include "MCTargetDesc/X86TargetDesc.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
@@ -23,10 +23,12 @@ namespace llvm {
class FunctionPass;
class JITCodeEmitter;
+class MachineCodeEmitter;
class MCCodeEmitter;
class MCContext;
+class MCInstrInfo;
class MCObjectWriter;
-class MachineCodeEmitter;
+class MCSubtargetInfo;
class Target;
class TargetAsmBackend;
class X86TargetMachine;
@@ -58,10 +60,9 @@ FunctionPass *createSSEDomainFixPass();
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
JITCodeEmitter &JCE);
-MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
- MCContext &Ctx);
-MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
- MCContext &Ctx);
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 7bb9676..4ccb43f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -17,6 +17,13 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
+// X86 Subtarget state.
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+ "64-bit mode (x86_64)">;
+
+//===----------------------------------------------------------------------===//
// X86 Subtarget features.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 4d7d96d..9b556a5 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -194,6 +194,9 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
// PUSH
case X86::PUSHi8: return X86::PUSHi32;
+ case X86::PUSHi16: return X86::PUSHi32;
+ case X86::PUSH64i8: return X86::PUSH64i32;
+ case X86::PUSH64i16: return X86::PUSH64i32;
}
}
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index c2d53c4..99b4479 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -709,13 +709,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
//===----------------------------------------------------------------------===//
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
- TargetMachine &TM,
unsigned SyntaxVariant,
const MCAsmInfo &MAI) {
if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(TM, MAI);
+ return new X86ATTInstPrinter(MAI);
if (SyntaxVariant == 1)
- return new X86IntelInstPrinter(TM, MAI);
+ return new X86IntelInstPrinter(MAI);
return 0;
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 463cde0..6eed6ab 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -884,7 +884,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
// Kill registers by popping.
if (Kills && I != MBB->begin()) {
MachineBasicBlock::iterator I2 = llvm::prior(I);
- for (;;) {
+ while (StackTop) {
unsigned KReg = getStackEntry(0);
if (!(Kills & (1 << KReg)))
break;
@@ -1467,25 +1467,24 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
}
if (STUses && !isMask_32(STUses))
- report_fatal_error("Inline asm fixed input regs"
- " must be last on the x87 stack");
+ MI->emitError("fixed input regs must be last on the x87 stack");
unsigned NumSTUses = CountTrailingOnes_32(STUses);
// Defs must be contiguous from the stack top. ST0-STn.
- if (STDefs && !isMask_32(STDefs))
- report_fatal_error("Inline asm output regs"
- " must be last on the x87 stack");
+ if (STDefs && !isMask_32(STDefs)) {
+ MI->emitError("output regs must be last on the x87 stack");
+ STDefs = NextPowerOf2(STDefs) - 1;
+ }
unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
// So must the clobbered stack slots. ST0-STm, m >= n.
if (STClobbers && !isMask_32(STDefs | STClobbers))
- report_fatal_error("Inline asm clobbers must be last on the x87 stack");
+ MI->emitError("clobbers must be last on the x87 stack");
// Popped inputs are the ones that are also clobbered or defined.
unsigned STPopped = STUses & (STDefs | STClobbers);
if (STPopped && !isMask_32(STPopped))
- report_fatal_error("Inline asm implicitly popped regs"
- " must be last on the x87 stack");
+ MI->emitError("implicitly popped regs must be last on the x87 stack");
unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
@@ -1501,7 +1500,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
if (!Op.isUse())
- report_fatal_error("Illegal \"f\" output constraint in inline asm");
+ MI->emitError("illegal \"f\" output constraint");
unsigned FPReg = getFPReg(Op);
FPUsed |= 1U << FPReg;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index cd4e954..ed45a9a 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -1029,3 +1030,181 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
FrameIdx = 0;
}
}
+
+/// permuteEncode - Create the permutation encoding used with frameless
+/// stacks. It is passed the number of registers to be saved and an array of the
+/// registers saved.
+static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) {
+ // The saved registers are numbered from 1 to 6. In order to encode the order
+ // in which they were saved, we re-number them according to their place in the
+ // register order. The re-numbering is relative to the last re-numbered
+ // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ bool Used[7] = { false, false, false, false, false, false, false };
+ uint32_t RenumRegs[6];
+ for (unsigned I = 0; I < SavedCount; ++I) {
+ uint32_t Renum = 0;
+ for (unsigned U = 1; U < 7; ++U) {
+ if (U == Registers[I])
+ break;
+ if (!Used[U])
+ ++Renum;
+ }
+
+ Used[Registers[I]] = true;
+ RenumRegs[I] = Renum;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (SavedCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1]
+ + 3 * RenumRegs[2] + RenumRegs[3];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1]
+ + RenumRegs[2];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[0];
+ break;
+ }
+
+ return permutationEncoding;
+}
+
+uint32_t X86FrameLowering::
+getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+ int DataAlignmentFactor, bool IsEH) const {
+ uint32_t Encoding = 0;
+ int CFAOffset = 0;
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
+ unsigned SavedRegIdx = 0;
+ int FramePointerReg = -1;
+
+ for (ArrayRef<MCCFIInstruction>::const_iterator
+ I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+ const MCCFIInstruction &Inst = *I;
+ MCSymbol *Label = Inst.getLabel();
+
+ // Ignore invalid labels.
+ if (Label && !Label->isDefined()) continue;
+
+ unsigned Operation = Inst.getOperation();
+ if (Operation != MCCFIInstruction::Move &&
+ Operation != MCCFIInstruction::RelMove)
+ // FIXME: We can't handle this frame just yet.
+ return 0;
+
+ const MachineLocation &Dst = Inst.getDestination();
+ const MachineLocation &Src = Inst.getSource();
+ const bool IsRelative = (Operation == MCCFIInstruction::RelMove);
+
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ if (Src.getReg() != MachineLocation::VirtualFP) {
+ // DW_CFA_def_cfa
+ assert(FramePointerReg == -1 &&"Defining more than one frame pointer?");
+ if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP &&
+ TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP)
+ // The frame pointer isn't EBP/RBP. Cannot make unwind information
+ // compact.
+ return 0;
+ FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH);
+ } // else DW_CFA_def_cfa_offset
+
+ if (IsRelative)
+ CFAOffset += Src.getOffset();
+ else
+ CFAOffset -= Src.getOffset();
+
+ continue;
+ }
+
+ if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ // DW_CFA_def_cfa_register
+ assert(FramePointerReg == -1 && "Defining more than one frame pointer?");
+
+ if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP &&
+ TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP)
+ // The frame pointer isn't EBP/RBP. Cannot make unwind information
+ // compact.
+ return 0;
+
+ FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH);
+ if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg))
+ return 0;
+
+ SavedRegs[0] = 0;
+ SavedRegIdx = 0;
+ continue;
+ }
+
+ unsigned Reg = Src.getReg();
+ int Offset = Dst.getOffset();
+ if (IsRelative)
+ Offset -= CFAOffset;
+ Offset /= DataAlignmentFactor;
+
+ if (Offset < 0) {
+ // FIXME: Handle?
+ // DW_CFA_offset_extended_sf
+ return 0;
+ } else if (Reg < 64) {
+ // DW_CFA_offset + Reg
+ if (SavedRegIdx >= 6) return 0;
+ int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH);
+ if (CURegNum == -1) return 0;
+ SavedRegs[SavedRegIdx++] = CURegNum;
+ } else {
+ // FIXME: Handle?
+ // DW_CFA_offset_extended
+ return 0;
+ }
+ }
+
+ // Bail if there are too many registers to encode.
+ if (SavedRegIdx > 6) return 0;
+
+ // Check if the offset is too big.
+ CFAOffset /= 4;
+ if ((CFAOffset & 0xFF) != CFAOffset)
+ return 0;
+ Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding.
+
+ if (FramePointerReg != -1) {
+ Encoding |= 0x01000000; // EBP/RBP Unwind Frame
+ for (unsigned I = 0; I != SavedRegIdx; ++I) {
+ unsigned Reg = SavedRegs[I];
+ if (Reg == unsigned(FramePointerReg)) continue;
+ Encoding |= (Reg & 0x7) << (I * 3); // Register encoding
+ }
+ } else {
+ Encoding |= 0x02000000; // Frameless unwind with small stack
+ Encoding |= (SavedRegIdx & 0x7) << 10;
+ Encoding |= permuteEncode(SavedRegIdx, SavedRegs);
+ }
+
+ return Encoding;
+}
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index d71108c..14c31ed 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -15,6 +15,7 @@
#define X86_FRAMELOWERING_H
#include "X86Subtarget.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -58,6 +59,9 @@ public:
void getInitialFrameState(std::vector<MachineMove> &Moves) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+ int DataAlignmentFactor, bool IsEH) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index cc8db35..2b0f283 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -192,6 +192,7 @@ namespace {
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
+ bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
@@ -547,6 +548,34 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
}
+static bool isDispSafeForFrameIndex(int64_t Val) {
+ // On 64-bit platforms, we can run into an issue where a frame index
+ // includes a displacement that, when added to the explicit displacement,
+ // will overflow the displacement field. Assuming that the frame index
+ // displacement fits into a 31-bit integer (which is only slightly more
+ // aggressive than the current fundamental assumption that it fits into
+ // a 32-bit integer), a 31-bit disp should always be safe.
+ return isInt<31>(Val);
+}
+
+bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+ X86ISelAddressMode &AM) {
+ int64_t Val = AM.Disp + Offset;
+ CodeModel::Model M = TM.getCodeModel();
+ if (Subtarget->is64Bit()) {
+ if (!X86::isOffsetSuitableForCodeModel(Val, M,
+ AM.hasSymbolicDisplacement()))
+ return true;
+ // In addition to the checks required for a register base, check that
+ // we do not try to use an unsafe Disp with a frame index.
+ if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
+ !isDispSafeForFrameIndex(Val))
+ return true;
+ }
+ AM.Disp = Val;
+ return false;
+
+}
bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
SDValue Address = N->getOperand(1);
@@ -596,18 +625,22 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
// must allow RIP.
!AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
- int64_t Offset = AM.Disp + G->getOffset();
- if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
+ X86ISelAddressMode Backup = AM;
AM.GV = G->getGlobal();
- AM.Disp = Offset;
AM.SymbolFlags = G->getTargetFlags();
+ if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
- int64_t Offset = AM.Disp + CP->getOffset();
- if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
+ X86ISelAddressMode Backup = AM;
AM.CP = CP->getConstVal();
AM.Align = CP->getAlignment();
- AM.Disp = Offset;
AM.SymbolFlags = CP->getTargetFlags();
+ if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
AM.ES = S->getSymbol();
AM.SymbolFlags = S->getTargetFlags();
@@ -689,7 +722,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
- bool is64Bit = Subtarget->is64Bit();
DebugLoc dl = N.getDebugLoc();
DEBUG({
dbgs() << "MatchAddress: ";
@@ -699,8 +731,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (Depth > 5)
return MatchAddressBase(N, AM);
- CodeModel::Model M = TM.getCodeModel();
-
// If this is already a %rip relative address, we can only merge immediates
// into it. Instead of handling this in every case, we handle it here.
// RIP relative addressing: %rip + 32-bit displacement!
@@ -710,14 +740,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// consistency.
if (!AM.ES && AM.JT != -1) return true;
- if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
- int64_t Val = AM.Disp + Cst->getSExtValue();
- if (X86::isOffsetSuitableForCodeModel(Val, M,
- AM.hasSymbolicDisplacement())) {
- AM.Disp = Val;
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
+ if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
return false;
- }
- }
return true;
}
@@ -725,12 +750,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
default: break;
case ISD::Constant: {
uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
- if (!is64Bit ||
- X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
- AM.hasSymbolicDisplacement())) {
- AM.Disp += Val;
+ if (!FoldOffsetIntoAddress(Val, AM))
return false;
- }
break;
}
@@ -746,8 +767,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
break;
case ISD::FrameIndex:
- if (AM.BaseType == X86ISelAddressMode::RegBase
- && AM.Base_Reg.getNode() == 0) {
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == 0 &&
+ (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
return false;
@@ -776,16 +798,12 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
- uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
- if (!is64Bit ||
- X86::isOffsetSuitableForCodeModel(Disp, M,
- AM.hasSymbolicDisplacement()))
- AM.Disp = Disp;
- else
- AM.IndexReg = ShVal;
- } else {
- AM.IndexReg = ShVal;
+ uint64_t Disp = AddVal->getSExtValue() << Val;
+ if (!FoldOffsetIntoAddress(Disp, AM))
+ return false;
}
+
+ AM.IndexReg = ShVal;
return false;
}
break;
@@ -819,13 +837,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
Reg = MulVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
- uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
- CN->getZExtValue();
- if (!is64Bit ||
- X86::isOffsetSuitableForCodeModel(Disp, M,
- AM.hasSymbolicDisplacement()))
- AM.Disp = Disp;
- else
+ uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
+ if (FoldOffsetIntoAddress(Disp, AM))
Reg = N.getNode()->getOperand(0);
} else {
Reg = N.getNode()->getOperand(0);
@@ -950,19 +963,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (CurDAG->isBaseWithConstantOffset(N)) {
X86ISelAddressMode Backup = AM;
ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
- uint64_t Offset = CN->getSExtValue();
// Start with the LHS as an addr mode.
if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
- // Address could not have picked a GV address for the displacement.
- AM.GV == NULL &&
- // On x86-64, the resultant disp must fit in 32-bits.
- (!is64Bit ||
- X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
- AM.hasSymbolicDisplacement()))) {
- AM.Disp += Offset;
+ !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
return false;
- }
AM = Backup;
}
break;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4f8b90f..5096d9a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -235,10 +235,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::SREM_I64, "_allrem");
+ setLibcallName(RTLIB::UREM_I64, "_aullrem");
+ setLibcallName(RTLIB::MUL_I64, "_allmul");
setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
}
@@ -646,6 +652,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
}
+ // We don't support FMA.
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
// Long double always uses X87.
if (!UseSoftFloat) {
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
@@ -670,6 +680,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSIN , MVT::f80 , Expand);
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
}
+
+ setOperationAction(ISD::FMA, MVT::f80, Expand);
}
// Always use a library call for pow.
@@ -976,7 +988,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
- setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
@@ -994,63 +1005,58 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
- // Custom lower build_vector, vector_shuffle, scalar_to_vector,
- // insert_vector_elt extract_subvector and extract_vector_elt for
- // 256-bit types.
- for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
- ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
- // Do not attempt to custom lower non-256-bit vectors
- if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
- || (MVT(VT).getSizeInBits() < 256))
- continue;
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- }
- // Custom-lower insert_subvector and extract_subvector based on
- // the result type.
+ // Custom lower several nodes for 256-bit types.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
- ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
- // Do not attempt to custom lower non-256-bit vectors
- if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
+
+ // Extract subvector is special because the value type
+ // (result) is 128-bit but the source is 256-bit wide.
+ if (VT.is128BitVector())
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+
+ // Do not attempt to custom lower other non-256-bit vectors
+ if (!VT.is256BitVector())
continue;
- if (MVT(VT).getSizeInBits() == 128) {
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
- }
- else if (MVT(VT).getSizeInBits() == 256) {
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- }
+ setOperationAction(ISD::BUILD_VECTOR, SVT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom);
}
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
- // Don't promote loads because we need them for VPERM vector index versions.
+ for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
+ MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+ EVT VT = SVT;
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
- VT++) {
- if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
- || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
+ // Do not attempt to promote non-256-bit vectors
+ if (!VT.is256BitVector())
continue;
- setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64);
- setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64);
- setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64);
- //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
- //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64);
- setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
+
+ setOperationAction(ISD::AND, SVT, Promote);
+ AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
+ setOperationAction(ISD::OR, SVT, Promote);
+ AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
+ setOperationAction(ISD::XOR, SVT, Promote);
+ AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, SVT, Promote);
+ AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, SVT, Promote);
+ AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
}
}
+ // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
+ // of this type with custom code.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+ }
+
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -3832,19 +3838,24 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
}
/// getOnesVector - Returns a vector of specified type with all bits set.
-///
+/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
+/// their original type, ensuring they get CSE'd.
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
+ assert((VT.is128BitVector() || VT.is256BitVector())
+ && "Expected a 128-bit or 256-bit vector type");
- // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
- // type. This ensures they get CSE'd.
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+
SDValue Vec;
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ if (VT.is256BitVector()) {
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ } else
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
-
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
@@ -4459,17 +4470,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return ConcatVectors(Lower, Upper, DAG);
}
- // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
- // All one's are handled with pcmpeqd. In AVX, zero's are handled with
- // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
- // is present, so AllOnes is ignored.
+ // All zero's:
+ // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
+ // All one's:
+ // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
- (Op.getValueType().getSizeInBits() != 256 &&
- ISD::isBuildVectorAllOnes(Op.getNode()))) {
- // Canonicalize this to <4 x i32> (SSE) to
+ ISD::isBuildVectorAllOnes(Op.getNode())) {
+ // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32)
+ if (Op.getValueType() == MVT::v4i32 ||
+ Op.getValueType() == MVT::v8i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.getNode()))
@@ -8916,8 +8927,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
// Lower SHL with variable shift amount.
- // Cannot lower SHL without SSE4.1 or later.
- if (!Subtarget->hasSSE41()) return SDValue();
+ // Cannot lower SHL without SSE2 or later.
+ if (!Subtarget->hasSSE2()) return SDValue();
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
@@ -9064,13 +9075,66 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return Sum;
}
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+ SDNode* Node = Op.getNode();
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+
+ if (Subtarget->hasSSE2() && VT.isVector()) {
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+ unsigned SHLIntrinsicsID = 0;
+ unsigned SRAIntrinsicsID = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i64: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q;
+ SRAIntrinsicsID = 0;
+ break;
+ }
+ case MVT::v4i32: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
+ SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
+ break;
+ }
+ case MVT::v8i16: {
+ SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
+ SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
+ break;
+ }
+ }
+
+ SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SHLIntrinsicsID, MVT::i32),
+ Node->getOperand(0), ShAmt);
+
+ // In case of 1 bit sext, no need to shr
+ if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+
+ if (SRAIntrinsicsID) {
+ Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+ Tmp1, ShAmt);
+ }
+ return Tmp1;
+ }
+
+ return SDValue();
+}
+
+
SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
DebugLoc dl = Op.getDebugLoc();
- if (!Subtarget->hasSSE2()) {
+ // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+ // There isn't any reason to disable it if the target processor supports it.
+ if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0,
- Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
DAG.getTargetConstant(1, MVT::i8), // Scale
@@ -9225,6 +9289,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
@@ -9323,6 +9388,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
default:
assert(false && "Do not know how to custom type legalize this operation!");
return;
+ case ISD::SIGN_EXTEND_INREG:
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
@@ -9457,7 +9523,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
- case X86ISD::PANDN: return "X86ISD::PANDN";
+ case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
case X86ISD::PSIGND: return "X86ISD::PSIGND";
@@ -11808,10 +11874,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- // Want to form PANDN nodes, in the hopes of then easily combining them with
- // OR and AND nodes to form PBLEND/PSIGN.
+ // Want to form ANDNP nodes:
+ // 1) In the hopes of then easily combining them with OR and AND nodes
+ // to form PBLEND/PSIGN.
+ // 2) To match ANDN packed intrinsics
EVT VT = N->getValueType(0);
- if (VT != MVT::v2i64)
+ if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -11821,12 +11889,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
// Check LHS for vnot
if (N0.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
- return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
// Check RHS for vnot
if (N1.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
- return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
return SDValue();
}
@@ -11852,10 +11920,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (Subtarget->hasSSSE3()) {
if (VT == MVT::v2i64) {
// Canonicalize pandn to RHS
- if (N0.getOpcode() == X86ISD::PANDN)
+ if (N0.getOpcode() == X86ISD::ANDNP)
std::swap(N0, N1);
// or (and (m, x), (pandn m, y))
- if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
SDValue Mask = N1.getOperand(0);
SDValue X = N1.getOperand(1);
SDValue Y;
@@ -11864,7 +11932,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (N0.getOperand(1) == Mask)
Y = N0.getOperand(0);
- // Check to see if the mask appeared in both the AND and PANDN and
+ // Check to see if the mask appeared in both the AND and ANDNP and
if (!Y.getNode())
return SDValue();
@@ -12592,6 +12660,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
case 'y':
case 'x':
case 'Y':
+ case 'l':
return C_RegisterClass;
case 'a':
case 'b':
@@ -12889,30 +12958,30 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// in the normal allocation?
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget->is64Bit()) {
- if (VT == MVT::i32)
+ if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, X86::GR32RegisterClass);
else if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16RegisterClass);
- else if (VT == MVT::i8)
+ else if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, X86::GR8RegisterClass);
- else if (VT == MVT::i64)
+ else if (VT == MVT::i64 || VT == MVT::f64)
return std::make_pair(0U, X86::GR64RegisterClass);
break;
}
// 32-bit fallthrough
case 'Q': // Q_REGS
- if (VT == MVT::i32)
+ if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, X86::GR32_ABCDRegisterClass);
else if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16_ABCDRegisterClass);
- else if (VT == MVT::i8)
+ else if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass);
else if (VT == MVT::i64)
return std::make_pair(0U, X86::GR64_ABCDRegisterClass);
break;
case 'r': // GENERAL_REGS
case 'l': // INDEX_REGS
- if (VT == MVT::i8)
+ if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, X86::GR8RegisterClass);
if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16RegisterClass);
@@ -12920,7 +12989,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, X86::GR32RegisterClass);
return std::make_pair(0U, X86::GR64RegisterClass);
case 'R': // LEGACY_REGS
- if (VT == MVT::i8)
+ if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d9c883f..b603678 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -169,8 +169,8 @@ namespace llvm {
/// PSHUFB - Shuffle 16 8-bit values within a vector.
PSHUFB,
- /// PANDN - and with not'd value.
- PANDN,
+ /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+ ANDNP,
/// PSIGNB/W/D - Copy integer sign.
PSIGNB, PSIGNW, PSIGND,
@@ -825,6 +825,7 @@ namespace llvm {
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
// Utility functions to help LowerVECTOR_SHUFFLE
SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 7daa264..6d89bcc 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -460,6 +460,11 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, Requires<[HasCLMUL]>;
+
+class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
// FMA3 Instruction Templates
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 7c9a9f7..b00109c 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -46,8 +46,8 @@ def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
-def X86pandn : SDNode<"X86ISD::PANDN",
- SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+def X86andnp : SDNode<"X86ISD::ANDNP",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86psignb : SDNode<"X86ISD::PSIGNB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
@@ -168,11 +168,13 @@ def ssmem : Operand<v4f32> {
let PrintMethod = "printf32mem";
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def sdmem : Operand<v2f64> {
let PrintMethod = "printf64mem";
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
//===----------------------------------------------------------------------===//
@@ -301,6 +303,7 @@ def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
// 256-bit bitconvert pattern fragments
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
def vzmovl_v2i64 : PatFrag<(ops node:$src),
(bitconvert (v2i64 (X86vzmovl
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 702331d..55b5835 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -36,7 +36,6 @@
#include <limits>
#define GET_INSTRINFO_CTOR
-#define GET_INSTRINFO_MC_DESC
#include "X86GenInstrInfo.inc"
using namespace llvm;
@@ -301,12 +300,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 },
{ X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 },
{ X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
+ { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 },
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
{ X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
{ X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
{ X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
{ X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
{ X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 },
{ X86::MUL16r, X86::MUL16m, 1, 0 },
{ X86::MUL32r, X86::MUL32m, 1, 0 },
{ X86::MUL64r, X86::MUL64m, 1, 0 },
@@ -411,10 +415,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOV8rr, X86::MOV8rm, 0 },
{ X86::MOVAPDrr, X86::MOVAPDrm, 16 },
{ X86::MOVAPSrr, X86::MOVAPSrm, 16 },
+ { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 },
+ { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 },
{ X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
{ X86::MOVDQArr, X86::MOVDQArm, 16 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 },
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
{ X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
{ X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
@@ -425,6 +432,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
{ X86::MOVUPDrr, X86::MOVUPDrm, 16 },
{ X86::MOVUPSrr, X86::MOVUPSrm, 0 },
+ { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
{ X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
{ X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
@@ -787,6 +796,9 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MOVAPSrm:
case X86::MOVAPDrm:
case X86::MOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
return true;
@@ -808,6 +820,9 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::MOVAPSmr:
case X86::MOVAPDmr:
case X86::MOVDQAmr:
+ case X86::VMOVAPSYmr:
+ case X86::VMOVAPDYmr:
+ case X86::VMOVDQAYmr:
case X86::MMX_MOVD64mr:
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
@@ -926,6 +941,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVDQArm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::FsMOVAPSrm:
@@ -1975,6 +1994,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = X86::MOV8rr;
} else if (X86::VR128RegClass.contains(DestReg, SrcReg))
Opc = X86::MOVAPSrr;
+ else if (X86::VR256RegClass.contains(DestReg, SrcReg))
+ Opc = X86::VMOVAPSYrr;
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
else
@@ -2064,6 +2085,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
return load ? X86::MOVAPSrm : X86::MOVAPSmr;
else
return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+ case 32:
+ assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+ // If stack is realigned we can use aligned stores.
+ if (isStackAligned)
+ return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
+ else
+ return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
}
}
@@ -2853,6 +2881,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
break;
}
switch (Opc2) {
@@ -2875,6 +2908,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPSYrm:
+ case X86::VMOVAPDYrm:
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
break;
}
@@ -3053,6 +3091,13 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
+ // AVX 256-bit support
+ { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
+ { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
+ { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
+ { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
+ { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 8cab808..7eb07b0 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -251,6 +251,7 @@ class X86MemOperand<string printMethod> : Operand<iPTR> {
let ParserMatchClass = X86MemAsmOperand;
}
+let OperandType = "OPERAND_MEMORY" in {
def opaque32mem : X86MemOperand<"printopaquemem">;
def opaque48mem : X86MemOperand<"printopaquemem">;
def opaque80mem : X86MemOperand<"printopaquemem">;
@@ -267,6 +268,7 @@ def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
def f128mem : X86MemOperand<"printf128mem">;
def f256mem : X86MemOperand<"printf256mem">;
+}
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
// plain GR64, so that it doesn't potentially require a REX prefix.
@@ -274,6 +276,7 @@ def i8mem_NOREX : Operand<i64> {
let PrintMethod = "printi8mem";
let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
// GPRs available for tailcall.
@@ -287,6 +290,7 @@ def i32mem_TC : Operand<i32> {
let PrintMethod = "printi32mem";
let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
// Special i64mem for addresses of load folding tail calls. These are not
@@ -297,9 +301,11 @@ def i64mem_TC : Operand<i64> {
let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
ptr_rc_tailcall, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
-let ParserMatchClass = X86AbsMemAsmOperand,
+let OperandType = "OPERAND_PCREL",
+ ParserMatchClass = X86AbsMemAsmOperand,
PrintMethod = "print_pcrel_imm" in {
def i32imm_pcrel : Operand<i32>;
def i16imm_pcrel : Operand<i16>;
@@ -317,6 +323,7 @@ def brtarget8 : Operand<OtherVT>;
def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
+ let OperandType = "OPERAND_IMMEDIATE";
}
class ImmSExtAsmOperandClass : AsmOperandClass {
@@ -363,15 +370,18 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
// 16-bits but only 8 bits are significant.
def i16i8imm : Operand<i16> {
let ParserMatchClass = ImmSExti16i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
// 32-bits but only 8 bits are significant.
def i32i8imm : Operand<i32> {
let ParserMatchClass = ImmSExti32i8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
// 64-bits but only 32 bits are significant.
def i64i32imm : Operand<i64> {
let ParserMatchClass = ImmSExti64i32AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
}
// 64-bits but only 32 bits are significant, and those bits are treated as being
@@ -438,8 +448,10 @@ def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
-def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
-def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
+ AssemblerPredicate<"!Mode64Bit">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">,
+ AssemblerPredicate<"Mode64Bit">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
@@ -669,7 +681,7 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
}
let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
"push{q}\t$imm", []>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
"push{q}\t$imm", []>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 0bfc5e7..fe11d77 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -512,6 +512,26 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
VEX_4V, VEX_W;
+let Predicates = [HasAVX] in {
+ def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f32 (sint_to_fp GR32:$src)),
+ (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f32 (sint_to_fp GR64:$src)),
+ (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+ def : Pat<(f64 (sint_to_fp GR32:$src)),
+ (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f64 (sint_to_fp GR64:$src)),
+ (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
@@ -1473,83 +1493,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
- SDNode OpNode, int HasPat = 0,
- list<list<dag>> Pattern = []> {
+ SDNode OpNode> {
let Pattern = []<dag> in {
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
- !if(HasPat, Pattern[0], // rr
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
- VR128:$src2)))]),
- !if(HasPat, Pattern[2], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]), 0>,
- VEX_4V;
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
- !if(HasPat, Pattern[1], // rr
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64
- VR128:$src2))))]),
- !if(HasPat, Pattern[3], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]), 0>,
- OpSize, VEX_4V;
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))], 0>,
+ OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
- !if(HasPat, Pattern[0], // rr
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
- VR128:$src2)))]),
- !if(HasPat, Pattern[2], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))])>, TB;
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB;
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
- !if(HasPat, Pattern[1], // rr
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64
- VR128:$src2))))]),
- !if(HasPat, Pattern[3], // rm
- [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))])>,
- TB, OpSize;
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>, TB, OpSize;
}
}
/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
///
-multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
+ !strconcat(OpcodeStr, "ps"), f256mem,
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, VEX_4V;
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
+ !strconcat(OpcodeStr, "pd"), f256mem,
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ OpSize, VEX_4V;
}
// AVX 256-bit packed logical ops forms
-defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
-defm VOR : sse12_fp_packed_logical_y<0x56, "or">;
-defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">;
-let isCommutable = 0 in
- defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">;
+defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
+defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
+defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
+defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
let isCommutable = 0 in
- defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
- // single r+r
- [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
- // double r+r
- [],
- // single r+m
- [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
- // double r+m
- []]>;
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
@@ -2037,7 +2042,10 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
}
// The same as done above but for AVX. The 128-bit versions are the
-// same, but re-encoded. The 256-bit does not support PI version.
+// same, but re-encoded. The 256-bit does not support PI version, and
+// doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
// FIXME: Change encoding to pseudo! This is blocked right now by the x86
// JIT implementatioan, it does not expand the instructions below like
// X86MCInstLower does.
@@ -2052,8 +2060,8 @@ def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
let ExeDomain = SSEPackedInt in
-def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
}
def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
@@ -3660,6 +3668,19 @@ let Predicates = [HasXMMInt] in {
let Predicates = [HasAVX] in {
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
}
// Move scalar to XMM zero-extended
@@ -3842,6 +3863,8 @@ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
let Predicates = [HasSSE1] in {
def : Pat<(alignedloadv4i32 addr:$src),
(MOVAPSrm addr:$src)>;
@@ -3870,8 +3893,9 @@ let Predicates = [HasSSE1] in {
(MOVUPSmr addr:$dst, VR128:$src)>;
}
-// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+// Use vmovaps/vmovups for AVX integer load/store.
let Predicates = [HasAVX] in {
+ // 128-bit load/store
def : Pat<(alignedloadv4i32 addr:$src),
(VMOVAPSrm addr:$src)>;
def : Pat<(loadv4i32 addr:$src),
@@ -3897,6 +3921,24 @@ let Predicates = [HasAVX] in {
(VMOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(VMOVUPSmr addr:$dst, VR128:$src)>;
+
+ // 256-bit load/store
+ def : Pat<(alignedloadv4i64 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv4i64 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedloadv8i32 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv8i32 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -5195,33 +5237,52 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
// CLMUL Instructions
//===----------------------------------------------------------------------===//
-// Only the AVX version of CLMUL instructions are described here.
-
// Carry-less Multiplication instructions
-def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ []>;
+
+def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ []>;
+}
+
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
-def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
-// Assembler Only
-multiclass avx_vpclmul<string asm> {
- def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
-
- def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
-}
-defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">;
-defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
-defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
-defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
+
+multiclass pclmul_alias<string asm, int immop> {
+ def : InstAlias<!strconcat("pclmul", asm,
+ "dq {$src, $dst|$dst, $src}"),
+ (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+
+ def : InstAlias<!strconcat("pclmul", asm,
+ "dq {$src, $dst|$dst, $src}"),
+ (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+
+ def : InstAlias<!strconcat("vpclmul", asm,
+ "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+
+ def : InstAlias<!strconcat("vpclmul", asm,
+ "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+}
+defm : pclmul_alias<"hqhq", 0x11>;
+defm : pclmul_alias<"hqlq", 0x01>;
+defm : pclmul_alias<"lqhq", 0x10>;
+defm : pclmul_alias<"lqlq", 0x00>;
//===----------------------------------------------------------------------===//
// AVX Instructions
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 04149e7..ce8ef49 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -18,26 +18,32 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
namespace {
class X86MCCodeEmitter : public MCCodeEmitter {
X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
- const TargetMachine &TM;
- const TargetInstrInfo &TII;
+ const MCInstrInfo &MCII;
+ const MCSubtargetInfo &STI;
MCContext &Ctx;
- bool Is64BitMode;
public:
- X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
- : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
- Is64BitMode = is64Bit;
+ X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : MCII(mcii), STI(sti), Ctx(ctx) {
}
~X86MCCodeEmitter() {}
+ bool is64BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+ }
+
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86RegisterInfo::getX86RegNum(MO.getReg());
}
@@ -126,16 +132,10 @@ public:
} // end anonymous namespace
-MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &,
- TargetMachine &TM,
- MCContext &Ctx) {
- return new X86MCCodeEmitter(TM, Ctx, false);
-}
-
-MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
- TargetMachine &TM,
- MCContext &Ctx) {
- return new X86MCCodeEmitter(TM, Ctx, true);
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new X86MCCodeEmitter(MCII, STI, Ctx);
}
/// isDisp8 - Return true if this signed displacement fits in a 8-bit
@@ -245,7 +245,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Handle %rip relative addressing.
if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
- assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode");
+ assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode");
assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
@@ -284,7 +284,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
BaseRegNo != N86::ESP &&
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
- (!Is64BitMode || BaseReg != 0)) {
+ (!is64BitMode() || BaseReg != 0)) {
if (BaseReg == 0) { // [disp32] in X86-32 mode
EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
@@ -729,7 +729,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// Emit the address size opcode prefix as needed.
if ((TSFlags & X86II::AdSize) ||
- (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+ (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
EmitByte(0x67, CurByte, OS);
// Emit the operand size opcode prefix as needed.
@@ -772,7 +772,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// Handle REX prefix.
// FIXME: Can this come before F2 etc to simplify emission?
- if (Is64BitMode) {
+ if (is64BitMode()) {
if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
EmitByte(0x40 | REX, CurByte, OS);
}
@@ -803,7 +803,7 @@ void X86MCCodeEmitter::
EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = TII.get(Opcode);
+ const MCInstrDesc &Desc = MCII.get(Opcode);
uint64_t TSFlags = Desc.TSFlags;
// Pseudo instructions don't get encoded.
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 793156f..e385335 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -16,8 +16,8 @@
#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
-#include "X86MCAsmInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index d32b822..f2faf59 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -40,7 +40,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/CommandLine.h"
-#define GET_REGINFO_MC_DESC
#define GET_REGINFO_TARGET_DESC
#include "X86GenRegisterInfo.inc"
@@ -107,8 +106,8 @@ int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
/// getCompactUnwindRegNum - This function maps the register to the number for
/// compact unwind encoding. Return -1 if the register isn't valid.
-int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum) const {
- switch (RegNum) {
+int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
+ switch (getLLVMRegNum(RegNum, isEH)) {
case X86::EBX: case X86::RBX: return 1;
case X86::ECX: case X86::R12: return 2;
case X86::EDX: case X86::R13: return 3;
@@ -730,7 +729,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (MI.getOperand(i+3).isImm()) {
// Offset is a 32-bit integer.
- int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
+ int Imm = (int)(MI.getOperand(i + 3).getImm());
+ int Offset = FIOffset + Imm;
+ assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
+ "Requesting 64-bit offset in 32-bit immediate!");
MI.getOperand(i + 3).ChangeToImmediate(Offset);
} else {
// Offset is symbolic. This is extremely rare.
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index a09c7ee..a12eb12 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -83,7 +83,7 @@ public:
/// getCompactUnwindRegNum - This function maps the register to the number for
/// compact unwind encoding. Return -1 if the register isn't valid.
- int getCompactUnwindRegNum(unsigned RegNum) const;
+ int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const;
/// Code Generation virtual methods...
///
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index a1e6d7b..5e6c659 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -21,9 +21,8 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/SmallVector.h"
-#define GET_SUBTARGETINFO_CTOR
-#define GET_SUBTARGETINFO_MC_DESC
#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
#include "X86GenSubtargetInfo.inc"
using namespace llvm;
@@ -158,7 +157,7 @@ const char *X86Subtarget::getBZeroEntry() const {
/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
/// to immediate address.
bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
- if (Is64Bit)
+ if (In64BitMode)
return false;
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
@@ -174,73 +173,6 @@ unsigned X86Subtarget::getSpecialAddressLatency() const {
return 200;
}
-/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
-/// specified arguments. If we can't run cpuid on the host, return true.
-static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
- #if defined(__GNUC__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
- asm ("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value));
- return false;
- #elif defined(_MSC_VER)
- int registers[4];
- __cpuid(registers, value);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
- return false;
- #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
- #if defined(__GNUC__)
- asm ("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value));
- return false;
- #elif defined(_MSC_VER)
- __asm {
- mov eax,value
- cpuid
- mov esi,rEAX
- mov dword ptr [esi],eax
- mov esi,rEBX
- mov dword ptr [esi],ebx
- mov esi,rECX
- mov dword ptr [esi],ecx
- mov esi,rEDX
- mov dword ptr [esi],edx
- }
- return false;
- #endif
-#endif
- return true;
-}
-
-static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
- Family = (EAX >> 8) & 0xf; // Bits 8 - 11
- Model = (EAX >> 4) & 0xf; // Bits 4 - 7
- if (Family == 6 || Family == 0xf) {
- if (Family == 0xf)
- // Examine extended family ID if family ID is F.
- Family += (EAX >> 20) & 0xff; // Bits 20 - 27
- // Examine extended model ID if family ID is 6 or F.
- Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
- }
-}
-
void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
union {
@@ -248,51 +180,65 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
char c[12];
} text;
- if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+ if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
return;
- GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+ X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
- if ((EDX >> 15) & 1) HasCMov = true;
- if ((EDX >> 23) & 1) X86SSELevel = MMX;
- if ((EDX >> 25) & 1) X86SSELevel = SSE1;
- if ((EDX >> 26) & 1) X86SSELevel = SSE2;
- if (ECX & 0x1) X86SSELevel = SSE3;
- if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
- if ((ECX >> 19) & 1) X86SSELevel = SSE41;
- if ((ECX >> 20) & 1) X86SSELevel = SSE42;
+ if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV);
+ if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX);
+ if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1);
+ if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2);
+ if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3);
+ if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);
+ if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);
+ if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);
// FIXME: AVX codegen support is not ready.
- //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
+ //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX);
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
- HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
- HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
- HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1);
- HasAES = IsIntel && ((ECX >> 25) & 0x1);
+ HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL);
+ HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3);
+ HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT);
+ HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES);
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
unsigned Family = 0;
unsigned Model = 0;
- DetectFamilyModel(EAX, Family, Model);
- IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+ X86_MC::DetectFamilyModel(EAX, Family, Model);
+ if (IsAMD || (Family == 6 && Model >= 13)) {
+ IsBTMemSlow = true;
+ ToggleFeature(X86::FeatureSlowBTMem);
+ }
// If it's Nehalem, unaligned memory access is fast.
- if (Family == 15 && Model == 26)
+ if (Family == 15 && Model == 26) {
IsUAMemFast = true;
+ ToggleFeature(X86::FeatureFastUAMem);
+ }
- GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
- HasX86_64 = (EDX >> 29) & 0x1;
- HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
- HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
+ X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if ((EDX >> 29) & 0x1) {
+ HasX86_64 = true;
+ ToggleFeature(X86::Feature64Bit);
+ }
+ if (IsAMD && ((ECX >> 6) & 0x1)) {
+ HasSSE4A = true;
+ ToggleFeature(X86::FeatureSSE4A);
+ }
+ if (IsAMD && ((ECX >> 16) & 0x1)) {
+ HasFMA4 = true;
+ ToggleFeature(X86::FeatureFMA4);
+ }
}
}
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
const std::string &FS,
- bool is64Bit, unsigned StackAlignOverride)
- : X86GenSubtargetInfo()
+ unsigned StackAlignOverride, bool is64Bit)
+ : X86GenSubtargetInfo(TT, CPU, FS)
, PICStyle(PICStyles::None)
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
@@ -312,40 +258,59 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
// FIXME: this is a known good value for Yonah. How about others?
, MaxInlineSizeThreshold(128)
, TargetTriple(TT)
- , Is64Bit(is64Bit) {
-
+ , In64BitMode(is64Bit) {
// Determine default and user specified characteristics
- if (!CPU.empty() || !FS.empty()) {
- // If feature string is not empty, parse features string.
+ if (!FS.empty() || !CPU.empty()) {
std::string CPUName = CPU;
- if (CPUName.empty())
+ if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
CPUName = sys::getHostCPUName();
- ParseSubtargetFeatures(FS, CPUName);
- // All X86-64 CPUs also have SSE2, however user might request no SSE via
- // -mattr, so don't force SSELevel here.
- if (HasAVX)
- X86SSELevel = NoMMXSSE;
+#else
+ CPUName = "generic";
+#endif
+ }
+
+ // Make sure 64-bit features are available in 64-bit mode. (But make sure
+ // SSE2 can be turned off explicitly.)
+ std::string FullFS = FS;
+ if (In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+64bit,+sse2," + FullFS;
+ else
+ FullFS = "+64bit,+sse2";
+ }
+
+ // If feature string is not empty, parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
} else {
// Otherwise, use CPUID to auto-detect feature set.
AutoDetectSubtargetFeatures();
- // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
- if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
- X86SSELevel = SSE2;
- }
- // If requesting codegen for X86-64, make sure that 64-bit features
- // are enabled.
- if (Is64Bit) {
- HasX86_64 = true;
+ // Make sure 64-bit features are available in 64-bit mode.
+ if (In64BitMode) {
+ HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
+ HasCMov = true; ToggleFeature(X86::FeatureCMOV);
- // All 64-bit cpus have cmov support.
- HasCMov = true;
+ if (!HasAVX && X86SSELevel < SSE2) {
+ X86SSELevel = SSE2;
+ ToggleFeature(X86::FeatureSSE1);
+ ToggleFeature(X86::FeatureSSE2);
+ }
+ }
}
+
+ // It's important to keep the MCSubtargetInfo feature bits in sync with
+ // target data structure which is shared with MC code emitter, etc.
+ if (In64BitMode)
+ ToggleFeature(X86::Mode64Bit);
+
+ if (HasAVX)
+ X86SSELevel = NoMMXSSE;
DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel
<< ", 64bit " << HasX86_64 << "\n");
- assert((!Is64Bit || HasX86_64) &&
+ assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
// Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
@@ -353,6 +318,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
if (StackAlignOverride)
stackAlignment = StackAlignOverride;
else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
- isTargetSolaris() || Is64Bit)
+ isTargetSolaris() || In64BitMode)
stackAlignment = 16;
}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d49b871..6d22027 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -24,6 +24,7 @@
namespace llvm {
class GlobalValue;
+class StringRef;
class TargetMachine;
/// PICStyles - The X86 backend supports a number of different styles of PIC.
@@ -111,9 +112,8 @@ protected:
Triple TargetTriple;
private:
- /// Is64Bit - True if the processor supports 64-bit instructions and
- /// pointer size is 64 bit.
- bool Is64Bit;
+ /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
+ bool In64BitMode;
public:
@@ -121,8 +121,8 @@ public:
/// of the specified triple.
///
X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit,
- unsigned StackAlignOverride);
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit);
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
@@ -135,13 +135,13 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
/// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
/// instruction.
void AutoDetectSubtargetFeatures();
- bool is64Bit() const { return Is64Bit; }
+ bool is64Bit() const { return In64BitMode; }
PICStyles::Style getPICStyle() const { return PICStyle; }
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
@@ -199,7 +199,7 @@ public:
}
bool isTargetWin64() const {
- return Is64Bit && (isTargetMingw() || isTargetWindows());
+ return In64BitMode && (isTargetMingw() || isTargetWindows());
}
bool isTargetEnvMacho() const {
@@ -207,7 +207,7 @@ public:
}
bool isTargetWin32() const {
- return !Is64Bit && (isTargetMingw() || isTargetWindows());
+ return !In64BitMode && (isTargetMingw() || isTargetWindows());
}
bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 1b6fa30..9cab0e0 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "X86MCAsmInfo.h"
#include "X86TargetMachine.h"
#include "X86.h"
#include "llvm/PassManager.h"
@@ -24,22 +23,6 @@
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
- if (TheTriple.getArch() == Triple::x86_64)
- return new X86_64MCAsmInfoDarwin(TheTriple);
- else
- return new X86MCAsmInfoDarwin(TheTriple);
- }
-
- if (TheTriple.isOSWindows())
- return new X86MCAsmInfoCOFF(TheTriple);
-
- return new X86ELFMCAsmInfo(TheTriple);
-}
-
static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
MCContext &Ctx, TargetAsmBackend &TAB,
raw_ostream &_OS,
@@ -62,15 +45,11 @@ extern "C" void LLVMInitializeX86Target() {
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
- // Register the target asm info.
- RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
- RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
-
// Register the code emitter.
TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
- createX86_32MCCodeEmitter);
+ createX86MCCodeEmitter);
TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
- createX86_64MCCodeEmitter);
+ createX86MCCodeEmitter);
// Register the asm backend.
TargetRegistry::RegisterAsmBackend(TheX86_32Target,
@@ -119,8 +98,8 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
const std::string &CPU,
const std::string &FS, bool is64Bit)
- : LLVMTargetMachine(T, TT),
- Subtarget(TT, CPU, FS, is64Bit, StackAlignmentOverride),
+ : LLVMTargetMachine(T, TT, CPU, FS),
+ Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
FrameLowering(*this, Subtarget),
ELFWriterInfo(is64Bit, true) {
DefRelocModel = getRelocationModel();