aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp12
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h2
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp9
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h3
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp4
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h1
-rw-r--r--lib/Target/X86/README-X86-64.txt47
-rw-r--r--lib/Target/X86/README.txt108
-rw-r--r--lib/Target/X86/X86.td27
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp51
-rw-r--r--lib/Target/X86/X86FastISel.cpp928
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp33
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp244
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp383
-rw-r--r--lib/Target/X86/X86ISelLowering.h31
-rw-r--r--lib/Target/X86/X86Instr3DNow.td109
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td2
-rw-r--r--lib/Target/X86/X86InstrCompiler.td225
-rw-r--r--lib/Target/X86/X86InstrExtension.td39
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp110
-rw-r--r--lib/Target/X86/X86InstrInfo.h3
-rw-r--r--lib/Target/X86/X86InstrInfo.td82
-rw-r--r--lib/Target/X86/X86InstrMMX.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td915
-rw-r--r--lib/Target/X86/X86InstrSystem.td7
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp21
-rw-r--r--lib/Target/X86/X86MCAsmInfo.h8
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp5
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp138
-rw-r--r--lib/Target/X86/X86RegisterInfo.h7
-rw-r--r--lib/Target/X86/X86RegisterInfo.td606
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp4
-rw-r--r--lib/Target/X86/X86Subtarget.h19
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp40
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp23
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h10
40 files changed, 2363 insertions, 1906 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e0989b0..c352bfc 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -928,6 +928,18 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
}
+
+ // Transforms "int $3" into "int3" as a size optimization. We can't write an
+ // instalias with an immediate operand yet.
+ if (Name == "int" && Operands.size() == 2) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+ }
+ }
return false;
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index d4a88d7..a9c90f8 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -485,7 +485,7 @@ struct InternalInstruction {
consumed___ indicates that the byte was already consumed and does not
need to be consumed again */
- /* The VEX.vvvv field, which contains a thrid register operand for some AVX
+ /* The VEX.vvvv field, which contains a third register operand for some AVX
instructions */
Reg vvvv;
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index d006eca..68247d2 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -41,8 +41,15 @@ X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
&TM.getSubtarget<X86Subtarget>()));
}
+void X86ATTInstPrinter::printRegName(raw_ostream &OS,
+ unsigned RegNo) const {
+ OS << '%' << getRegisterName(RegNo);
+}
+
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
- printInstruction(MI, OS);
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, OS))
+ printInstruction(MI, OS);
// If verbose assembly is enabled, we can print some informative comments.
if (CommentStream)
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index f24674f..5f939b6 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -26,11 +26,14 @@ class X86ATTInstPrinter : public MCInstPrinter {
public:
X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI);
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
// Methods used to print the alias of an instruction.
unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
+ // Autogenerated by tblgen, returns true if we successfully printed an
+ // alias.
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
// Autogenerated by tblgen.
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 47253eb..5f581ba 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -29,6 +29,10 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter1.inc"
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index ca99dc0..c8030c3 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -27,6 +27,7 @@ public:
X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
: MCInstPrinter(MAI) {}
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index e21d69a..bcfdf0b 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -36,7 +36,7 @@ _conv:
cmovb %rcx, %rax
ret
-Seems like the jb branch has high likelyhood of being taken. It would have
+Seems like the jb branch has high likelihood of being taken. It would have
saved a few instructions.
//===---------------------------------------------------------------------===//
@@ -124,51 +124,6 @@ if we have whole-function selectiondags.
//===---------------------------------------------------------------------===//
-Take the following C code
-(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640):
-
-struct u1
-{
- float x;
- float y;
-};
-
-float foo(struct u1 u)
-{
- return u.x + u.y;
-}
-
-Optimizes to the following IR:
-define float @foo(double %u.0) nounwind readnone {
-entry:
- %tmp8 = bitcast double %u.0 to i64 ; <i64> [#uses=2]
- %tmp6 = trunc i64 %tmp8 to i32 ; <i32> [#uses=1]
- %tmp7 = bitcast i32 %tmp6 to float ; <float> [#uses=1]
- %tmp2 = lshr i64 %tmp8, 32 ; <i64> [#uses=1]
- %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1]
- %tmp4 = bitcast i32 %tmp3 to float ; <float> [#uses=1]
- %0 = fadd float %tmp7, %tmp4 ; <float> [#uses=1]
- ret float %0
-}
-
-And current llvm-gcc/clang output:
- movd %xmm0, %rax
- movd %eax, %xmm1
- shrq $32, %rax
- movd %eax, %xmm0
- addss %xmm1, %xmm0
- ret
-
-We really shouldn't move the floats to RAX, only to immediately move them
-straight back to the XMM registers.
-
-There really isn't any good way to handle this purely in IR optimizers; it
-could possibly be handled by changing the output of the fronted, though. It
-would also be feasible to add a x86-specific DAGCombine to optimize the
-bitcast+trunc+(lshr+)bitcast combination.
-
-//===---------------------------------------------------------------------===//
-
Take the following code
(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653):
extern unsigned long table[];
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 1ac2305..560947a 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -7,14 +7,6 @@ copy (3-addr bswap + memory support?) This is available on Atom processors.
//===---------------------------------------------------------------------===//
-CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86
-backend knows how to three-addressify this shift, but it appears the register
-allocator isn't even asking it to do so in this case. We should investigate
-why this isn't happening, it could have significant impact on other important
-cases for X86 as well.
-
-//===---------------------------------------------------------------------===//
-
This should be one DIV/IDIV instruction, not a libcall:
unsigned test(unsigned long long X, unsigned Y) {
@@ -1572,7 +1564,7 @@ Implement processor-specific optimizations for parity with GCC on these
processors. GCC does two optimizations:
1. ix86_pad_returns inserts a noop before ret instructions if immediately
- preceeded by a conditional branch or is the target of a jump.
+ preceded by a conditional branch or is the target of a jump.
2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of
code contains more than 3 branches.
@@ -1736,26 +1728,6 @@ are functionally identical.
//===---------------------------------------------------------------------===//
Take the following C code:
-int x(int y) { return (y & 63) << 14; }
-
-Code produced by gcc:
- andl $63, %edi
- sall $14, %edi
- movl %edi, %eax
- ret
-
-Code produced by clang:
- shll $14, %edi
- movl %edi, %eax
- andl $1032192, %eax
- ret
-
-The code produced by gcc is 3 bytes shorter. This sort of construct often
-shows up with bitfields.
-
-//===---------------------------------------------------------------------===//
-
-Take the following C code:
int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
We generate the following IR with clang:
@@ -2016,3 +1988,81 @@ We currently generate:
We could save an instruction here by commuting the addss.
//===---------------------------------------------------------------------===//
+
+This (from PR9661):
+
+float clamp_float(float a) {
+ if (a > 1.0f)
+ return 1.0f;
+ else if (a < 0.0f)
+ return 0.0f;
+ else
+ return a;
+}
+
+Could compile to:
+
+clamp_float: # @clamp_float
+ movss .LCPI0_0(%rip), %xmm1
+ minss %xmm1, %xmm0
+ pxor %xmm1, %xmm1
+ maxss %xmm1, %xmm0
+ ret
+
+with -ffast-math.
+
+//===---------------------------------------------------------------------===//
+
+This function (from PR9803):
+
+int clamp2(int a) {
+ if (a > 5)
+ a = 5;
+ if (a < 0)
+ return 0;
+ return a;
+}
+
+Compiles to:
+
+_clamp2: ## @clamp2
+ pushq %rbp
+ movq %rsp, %rbp
+ cmpl $5, %edi
+ movl $5, %ecx
+ cmovlel %edi, %ecx
+ testl %ecx, %ecx
+ movl $0, %eax
+ cmovnsl %ecx, %eax
+ popq %rbp
+ ret
+
+The move of 0 could be scheduled above the test to make it is xor reg,reg.
+
+//===---------------------------------------------------------------------===//
+
+GCC PR48986. We currently compile this:
+
+void bar(void);
+void yyy(int* p) {
+ if (__sync_fetch_and_add(p, -1) == 1)
+ bar();
+}
+
+into:
+ movl $-1, %eax
+ lock
+ xaddl %eax, (%rdi)
+ cmpl $1, %eax
+ je LBB0_2
+
+Instead we could generate:
+
+ lock
+ dec %rdi
+ je LBB0_2
+
+The trick is to match "fetch_and_add(X, -C) == C".
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index efb6c8c..7bb9676 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -1,13 +1,13 @@
//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
-// This is a target description file for the Intel i386 architecture, refered to
+// This is a target description file for the Intel i386 architecture, referred to
// here as the "X86" architecture.
//
//===----------------------------------------------------------------------===//
@@ -32,7 +32,7 @@ def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
- // SSE1+ processors support them.
+ // SSE1+ processors support them.
[FeatureMMX, FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
@@ -50,7 +50,8 @@ def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
- "Enable 3DNow! instructions">;
+ "Enable 3DNow! instructions",
+ [FeatureMMX]>;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
"Enable 3DNow! Athlon instructions",
[Feature3DNow]>;
@@ -100,8 +101,10 @@ def : Proc<"i686", []>;
def : Proc<"pentiumpro", [FeatureCMOV]>;
def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
def : Proc<"pentium3", [FeatureSSE1]>;
+def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
+def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
@@ -121,14 +124,14 @@ def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
// FIXME: Disabling AVX for now since it's not ready.
-def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
+def : Proc<"corei7-avx", [FeatureSSE42, Feature64Bit,
FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
-def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
-def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
-def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
-def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k6-2", [Feature3DNow]>;
+def : Proc<"k6-3", [Feature3DNow]>;
+def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
@@ -156,8 +159,8 @@ def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
-def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
-def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"winchip2", [Feature3DNow]>;
+def : Proc<"c3", [Feature3DNow]>;
def : Proc<"c3-2", [FeatureSSE1]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index a7581eb..4d7d96d 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,6 +29,13 @@
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
+// Option to allow disabling arithmetic relaxation to workaround PR9807, which
+// is useful when running bitwise comparison experiments on Darwin. We should be
+// able to remove this once PR9807 is resolved.
+static cl::opt<bool>
+MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
+ cl::desc("Disable relaxation of arithmetic instruction for X86"));
+
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: assert(0 && "invalid fixup kind!");
@@ -201,6 +209,9 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
return true;
+ if (MCDisableArithRelaxation)
+ return false;
+
// Check if this instruction is ever relaxable.
if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
return false;
@@ -414,34 +425,26 @@ public:
TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return new DarwinX86_32AsmBackend(T);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (Triple(TT).getEnvironment() == Triple::MachO)
- return new DarwinX86_32AsmBackend(T);
- else
- return new WindowsX86AsmBackend(T, false);
- default:
- return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
- }
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, false);
+
+ return new ELFX86_32AsmBackend(T, TheTriple.getOS());
}
TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return new DarwinX86_64AsmBackend(T);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (Triple(TT).getEnvironment() == Triple::MachO)
- return new DarwinX86_64AsmBackend(T);
- else
- return new WindowsX86AsmBackend(T, true);
- default:
- return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
- }
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, true);
+
+ return new ELFX86_64AsmBackend(T, TheTriple.getOS());
}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 8874486..f1b9972 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -23,6 +23,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -77,10 +78,8 @@ private:
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
- bool X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM);
- bool X86FastEmitStore(EVT VT, unsigned Val,
- const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
@@ -109,11 +108,11 @@ private:
bool X86SelectFPExt(const Instruction *I);
bool X86SelectFPTrunc(const Instruction *I);
- bool X86SelectExtractValue(const Instruction *I);
-
bool X86VisitIntrinsicCall(const IntrinsicInst &I);
bool X86SelectCall(const Instruction *I);
+ bool DoSelectCall(const Instruction *I, const char *MemIntName);
+
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
}
@@ -125,6 +124,8 @@ private:
unsigned TargetMaterializeAlloca(const AllocaInst *C);
+ unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
+
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
@@ -133,6 +134,11 @@ private:
}
bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+
+ bool IsMemcpySmall(uint64_t Len);
+
+ bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len);
};
} // end anonymous namespace.
@@ -224,8 +230,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
- const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -395,43 +400,45 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
const Value *Op = *i;
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
- unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
- Disp += SL->getElementOffset(Idx);
- } else {
- uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- // Constant-offset addressing.
- Disp += CI->getSExtValue() * S;
- break;
- }
- if (isa<AddOperator>(Op) &&
- (!isa<Instruction>(Op) ||
- FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
- == FuncInfo.MBB) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add (in the same block) with a constant operand. Fold the
- // constant.
- ConstantInt *CI =
- cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
- Disp += CI->getSExtValue() * S;
- // Iterate on the other operand.
- Op = cast<AddOperator>(Op)->getOperand(0);
- continue;
- }
- if (IndexReg == 0 &&
- (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
- (S == 1 || S == 2 || S == 4 || S == 8)) {
- // Scaled-index addressing.
- Scale = S;
- IndexReg = getRegForGEPIndex(Op).first;
- if (IndexReg == 0)
- return false;
- break;
- }
- // Unsupported.
- goto unsupported_gep;
+ Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+ continue;
+ }
+
+ // A array/variable index is always of the form i*S where S is the
+ // constant scale size. See if we can push the scale into immediates.
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ break;
}
+ // Unsupported.
+ goto unsupported_gep;
}
}
// Check for displacement overflow.
@@ -445,7 +452,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
if (X86SelectAddress(U->getOperand(0), AM))
return true;
- // If we couldn't merge the sub value into this addr mode, revert back to
+ // If we couldn't merge the gep value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
break;
@@ -457,91 +464,91 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Handle constant address.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models yet.
+ // Can't handle alternate code models or TLS yet.
if (TM.getCodeModel() != CodeModel::Small)
return false;
- // RIP-relative addresses can't have additional register operands.
- if (Subtarget->isPICStyleRIPRel() &&
- (AM.Base.Reg != 0 || AM.IndexReg != 0))
- return false;
-
- // Can't handle TLS yet.
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal())
return false;
- // Okay, we've committed to selecting this global. Set up the basic address.
- AM.GV = GV;
-
- // Allow the subtarget to classify the global.
- unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
-
- // If this reference is relative to the pic base, set it now.
- if (isGlobalRelativeToPICBase(GVFlags)) {
- // FIXME: How do we know Base.Reg is free??
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- }
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
- // Unless the ABI requires an extra load, return a direct reference to
- // the global.
- if (!isGlobalStubReference(GVFlags)) {
- if (Subtarget->isPICStyleRIPRel()) {
- // Use rip-relative addressing if we can. Above we verified that the
- // base and index registers are unused.
- assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
- AM.Base.Reg = X86::RIP;
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
}
- AM.GVOpFlags = GVFlags;
- return true;
- }
- // Ok, we need to do a load from a stub. If we've already loaded from this
- // stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
- unsigned LoadReg;
- if (I != LocalValueMap.end() && I->second != 0) {
- LoadReg = I->second;
- } else {
- // Issue load from stub.
- unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
- X86AddressMode StubAM;
- StubAM.Base.Reg = AM.Base.Reg;
- StubAM.GV = GV;
- StubAM.GVOpFlags = GVFlags;
-
- // Prepare for inserting code in the local-value area.
- SavePoint SaveInsertPt = enterLocalValueArea();
-
- if (TLI.getPointerTy() == MVT::i64) {
- Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
-
- if (Subtarget->isPICStyleRIPRel())
- StubAM.Base.Reg = X86::RIP;
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
} else {
- Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
- }
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ }
- LoadReg = createResultReg(RC);
- MachineInstrBuilder LoadMI =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
- addFullAddress(LoadMI, StubAM);
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
- // Ok, back to normal mode.
- leaveLocalValueArea(SaveInsertPt);
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
- // Prevent loading GV stub multiple times in same MBB.
- LocalValueMap[V] = LoadReg;
- }
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
- // Now construct the final address. Note that the Disp, Scale,
- // and Index values may already be set here.
- AM.Base.Reg = LoadReg;
- AM.GV = 0;
- return true;
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
}
// If all else fails, try to materialize the value in a register.
@@ -699,7 +706,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
+ I->getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
@@ -719,18 +727,38 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
- // TODO: For now, don't try to handle cases where getLocInfo()
- // says Full but the types don't match.
- if (TLI.getValueType(RV->getType()) != VA.getValVT())
- return false;
// The calling-convention tables for x87 returns don't tell
// the whole story.
if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
return false;
- // Make the copy.
unsigned SrcReg = Reg + VA.getValNo();
+ EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT DstVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (SrcVT != DstVT) {
+ if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
+ return false;
+
+ if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
+ return false;
+
+ assert(DstVT == MVT::i32 && "X86 should always ext to i32");
+
+ if (SrcVT == MVT::i1) {
+ if (Outs[0].Flags.isSExt())
+ return false;
+ SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+ }
+ unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+ SrcReg, /*TODO: Kill=*/false);
+ }
+
+ // Make the copy.
unsigned DstReg = VA.getLocReg();
const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
@@ -862,12 +890,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
unsigned NEReg = createResultReg(&X86::GR8RegClass);
unsigned PReg = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::SETNEr), NEReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::SETPr), PReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::OR8rr), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
.addReg(PReg).addReg(NEReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -914,18 +939,31 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
- if (I->getType()->isIntegerTy(8) &&
- I->getOperand(0)->getType()->isIntegerTy(1)) {
- unsigned ResultReg = getRegForValue(I->getOperand(0));
- if (ResultReg == 0) return false;
- // Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
- if (ResultReg == 0) return false;
- UpdateValueMap(I, ResultReg);
- return true;
+ if (!I->getOperand(0)->getType()->isIntegerTy(1))
+ return false;
+
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0)
+ return false;
+
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ if (ResultReg == 0)
+ return false;
+
+ if (DstVT != MVT::i8) {
+ ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
}
- return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
}
@@ -1008,71 +1046,49 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
- } else if (ExtractValueInst *EI =
- dyn_cast<ExtractValueInst>(BI->getCondition())) {
- // Check to see if the branch instruction is from an "arithmetic with
- // overflow" intrinsic. The main way these intrinsics are used is:
- //
- // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
- // %sum = extractvalue { i32, i1 } %t, 0
- // %obit = extractvalue { i32, i1 } %t, 1
- // br i1 %obit, label %overflow, label %normal
- //
- // The %sum and %obit are converted in an ADD and a SETO/SETB before
- // reaching the branch. Therefore, we search backwards through the MBB
- // looking for the SETO/SETB instruction. If an instruction modifies the
- // EFLAGS register before we reach the SETO/SETB instruction, then we can't
- // convert the branch into a JO/JB instruction.
- if (const IntrinsicInst *CI =
- dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
- if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
- CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
- const MachineInstr *SetMI = 0;
- unsigned Reg = getRegForValue(EI);
-
- for (MachineBasicBlock::const_reverse_iterator
- RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend();
- RI != RE; ++RI) {
- const MachineInstr &MI = *RI;
-
- if (MI.definesRegister(Reg)) {
- if (MI.isCopy()) {
- Reg = MI.getOperand(1).getReg();
- continue;
- }
-
- SetMI = &MI;
- break;
- }
-
- const TargetInstrDesc &TID = MI.getDesc();
- if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
- MI.hasUnmodeledSideEffects())
- break;
- }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+ // typically happen for _Bool and C++ bools.
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+ unsigned TestOpc = 0;
+ switch (SourceVT.SimpleTy) {
+ default: break;
+ case MVT::i8: TestOpc = X86::TEST8ri; break;
+ case MVT::i16: TestOpc = X86::TEST16ri; break;
+ case MVT::i32: TestOpc = X86::TEST32ri; break;
+ case MVT::i64: TestOpc = X86::TEST64ri32; break;
+ }
+ if (TestOpc) {
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ if (OpReg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
+ .addReg(OpReg).addImm(1);
- if (SetMI) {
- unsigned OpCode = SetMI->getOpcode();
-
- if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4))
- .addMBB(TrueMBB);
- FastEmitBranch(FalseMBB, DL);
- FuncInfo.MBB->addSuccessor(TrueMBB);
- return true;
- }
+ unsigned JmpOpc = X86::JNE_4;
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ JmpOpc = X86::JE_4;
}
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
}
}
}
// Otherwise do a clumsy setcc and re-test it.
+ // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+ // in an explicit cast, so make sure to handle that correctly.
unsigned OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
- .addReg(OpReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
+ .addReg(OpReg).addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DL);
@@ -1081,42 +1097,42 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
bool X86FastISel::X86SelectShift(const Instruction *I) {
- unsigned CReg = 0, OpReg = 0, OpImm = 0;
+ unsigned CReg = 0, OpReg = 0;
const TargetRegisterClass *RC = NULL;
if (I->getType()->isIntegerTy(8)) {
CReg = X86::CL;
RC = &X86::GR8RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
- case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
- case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
+ case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(16)) {
CReg = X86::CX;
RC = &X86::GR16RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
- case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
- case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
+ case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(32)) {
CReg = X86::ECX;
RC = &X86::GR32RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
- case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
- case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
+ case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(64)) {
CReg = X86::RCX;
RC = &X86::GR64RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
- case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
- case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
+ case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; break;
default: return false;
}
} else {
@@ -1130,15 +1146,6 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
- // Fold immediate in shl(x,3).
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
- ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
- UpdateValueMap(I, ResultReg);
- return true;
- }
-
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1238,18 +1245,13 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
}
bool X86FastISel::X86SelectTrunc(const Instruction *I) {
- if (Subtarget->is64Bit())
- // All other cases should be handled by the tblgen generated code.
- return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
- // This code only handles truncation to byte right now.
+ // This code only handles truncation to byte.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
- // All other cases should be handled by the tblgen generated code.
return false;
- if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
- // All other cases should be handled by the tblgen generated code.
+ if (!TLI.isTypeLegal(SrcVT))
return false;
unsigned InputReg = getRegForValue(I->getOperand(0));
@@ -1257,16 +1259,26 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
- // First issue a copy to GR16_ABCD or GR32_ABCD.
- const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
- ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
- unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- CopyReg).addReg(InputReg);
+ if (SrcVT == MVT::i8) {
+ // Truncate from i8 to i1; no code needed.
+ UpdateValueMap(I, InputReg);
+ return true;
+ }
- // Then issue an extract_subreg.
+ if (!Subtarget->is64Bit()) {
+ // If we're on x86-32; we can't extract an i8 from a general register.
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+ ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
+ InputReg = CopyReg;
+ }
+
+ // Issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
- CopyReg, /*Kill=*/true,
+ InputReg, /*Kill=*/true,
X86::sub_8bit);
if (!ResultReg)
return false;
@@ -1275,35 +1287,92 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
return true;
}
-bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
- const ExtractValueInst *EI = cast<ExtractValueInst>(I);
- const Value *Agg = EI->getAggregateOperand();
+bool X86FastISel::IsMemcpySmall(uint64_t Len) {
+ return Len <= (Subtarget->is64Bit() ? 32 : 16);
+}
- if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
- switch (CI->getIntrinsicID()) {
- default: break;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow: {
- // Cheat a little. We know that the registers for "add" and "seto" are
- // allocated sequentially. However, we only keep track of the register
- // for "add" in the value map. Use extractvalue's index to get the
- // correct register for "seto".
- unsigned OpReg = getRegForValue(Agg);
- if (OpReg == 0)
- return false;
- UpdateValueMap(I, OpReg + *EI->idx_begin());
- return true;
- }
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len) {
+
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!IsMemcpySmall(Len))
+ return false;
+
+ bool i64Legal = Subtarget->is64Bit();
+
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 8 && i64Legal)
+ VT = MVT::i64;
+ else if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert(Len == 1);
+ VT = MVT::i8;
}
+
+ unsigned Reg;
+ bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
+ RV &= X86FastEmitStore(VT, Reg, DestAM);
+ assert(RV && "Failed to emit load or store??");
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ DestAM.Disp += Size;
+ SrcAM.Disp += Size;
}
- return false;
+ return true;
}
bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
+ case Intrinsic::memcpy: {
+ const MemCpyInst &MCI = cast<MemCpyInst>(I);
+ // Don't handle volatile or variable length memcpys.
+ if (MCI.isVolatile())
+ return false;
+
+ if (isa<ConstantInt>(MCI.getLength())) {
+ // Small memcpy's are common enough that we want to do them
+ // without a call if possible.
+ uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+ if (IsMemcpySmall(Len)) {
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ return false;
+ TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return true;
+ }
+ }
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memcpy");
+ }
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memset");
+ }
case Intrinsic::stackprotector: {
// Emit code inline code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
@@ -1314,33 +1383,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Grab the frame index.
X86AddressMode AM;
if (!X86SelectAddress(Slot, AM)) return false;
-
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
-
- return true;
- }
- case Intrinsic::objectsize: {
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
- const Type *Ty = I.getCalledFunction()->getReturnType();
-
- assert(CI && "Non-constant type in Intrinsic::objectsize?");
-
- MVT VT;
- if (!isTypeLegal(Ty, VT))
- return false;
-
- unsigned OpC = 0;
- if (VT == MVT::i32)
- OpC = X86::MOV32ri;
- else if (VT == MVT::i64)
- OpC = X86::MOV64ri;
- else
- return false;
-
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
- addImm(CI->isZero() ? -1ULL : 0);
- UpdateValueMap(&I, ResultReg);
return true;
}
case Intrinsic::dbg_declare: {
@@ -1362,11 +1405,10 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
}
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow: {
+ // FIXME: Should fold immediates.
+
// Replace "add with overflow" intrinsics with an "add" instruction followed
- // by a seto/setc instruction. Later on, when the "extractvalue"
- // instructions are encountered, we use the fact that two registers were
- // created sequentially to get the correct registers for the "sum" and the
- // "overflow bit".
+ // by a seto/setc instruction.
const Function *Callee = I.getCalledFunction();
const Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
@@ -1392,27 +1434,18 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
else
return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ // The call to CreateRegs builds two sequential registers, to store the
+ // both the the returned values.
+ unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
.addReg(Reg1).addReg(Reg2);
- unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
-
- // If the add with overflow is an intra-block value then we just want to
- // create temporaries for it like normal. If it is a cross-block value then
- // UpdateValueMap will return the cross-block register used. Since we
- // *really* want the value to be live in the register pair known by
- // UpdateValueMap, we have to use DestReg1+1 as the destination register in
- // the cross block case. In the non-cross-block case, we should just make
- // another register for the value.
- if (DestReg1 != ResultReg)
- ResultReg = DestReg1+1;
- else
- ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
unsigned Opc = X86::SETBr;
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
Opc = X86::SETOr;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
+
+ UpdateValueMap(&I, ResultReg, 2);
return true;
}
}
@@ -1430,11 +1463,18 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
return X86VisitIntrinsicCall(*II);
+ return DoSelectCall(I, 0);
+}
+
+// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
+bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
// Handle only C and fastcc calling conventions for now.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
- if (CC != CallingConv::C &&
- CC != CallingConv::Fast &&
+ if (CC != CallingConv::C && CC != CallingConv::Fast &&
CC != CallingConv::X86_FastCall)
return false;
@@ -1443,22 +1483,28 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
return false;
- // Let SDISel handle vararg functions.
const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- if (FTy->isVarArg())
+ bool isVarArg = FTy->isVarArg();
+
+ // Don't know how to handle Win64 varargs yet. Nothing special needed for
+ // x86-32. Special handling for x86-64 is implemented.
+ if (isVarArg && Subtarget->isTargetWin64())
return false;
// Fast-isel doesn't know about callee-pop yet.
- if (Subtarget->IsCalleePop(FTy->isVarArg(), CC))
+ if (Subtarget->IsCalleePop(isVarArg, CC))
return false;
- // Handle *simple* calls for now.
- const Type *RetTy = CS.getType();
- MVT RetVT;
- if (RetTy->isVoidTy())
- RetVT = MVT::isVoid;
- else if (!isTypeLegal(RetTy, RetVT, true))
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ SmallVector<uint64_t, 4> Offsets;
+ GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
+ Outs, TLI, &Offsets);
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ *FuncInfo.MF, FTy->isVarArg(),
+ Outs, FTy->getContext());
+ if (!CanLowerReturn)
return false;
// Materialize callee address in a register. FIXME: GV address can be
@@ -1475,13 +1521,6 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
} else
return false;
- // Allow calls which produce i1 results.
- bool AndToI1 = false;
- if (RetVT == MVT::i1) {
- RetVT = MVT::i8;
- AndToI1 = true;
- }
-
// Deal with call operands first.
SmallVector<const Value *, 8> ArgVals;
SmallVector<unsigned, 8> Args;
@@ -1493,9 +1532,11 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
ArgFlags.reserve(CS.arg_size());
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
- unsigned Arg = getRegForValue(*i);
- if (Arg == 0)
- return false;
+ // If we're lowering a mem intrinsic instead of a regular call, skip the
+ // last two arguments, which should not passed to the underlying functions.
+ if (MemIntName && e-i <= 2)
+ break;
+ Value *ArgVal = *i;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
if (CS.paramHasAttr(AttrInd, Attribute::SExt))
@@ -1503,34 +1544,83 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
- // FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
- return false;
+ if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
+ const PointerType *Ty = cast<PointerType>(ArgVal->getType());
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
+ unsigned FrameAlign = CS.getParamAlignment(AttrInd);
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByVal();
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ if (!IsMemcpySmall(FrameSize))
+ return false;
+ }
+
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg))
+ Flags.setInReg();
+ if (CS.paramHasAttr(AttrInd, Attribute::Nest))
+ Flags.setNest();
+
+ // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
+ // instruction. This is safe because it is common to all fastisel supported
+ // calling conventions on x86.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
+ if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
+ CI->getBitWidth() == 16) {
+ if (Flags.isSExt())
+ ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+ else
+ ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+ }
+ }
+
+ unsigned ArgReg;
- const Type *ArgTy = (*i)->getType();
+ // Passing bools around ends up doing a trunc to i1 and passing it.
+ // Codegen this as an argument + "and 1".
+ if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
+ cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
+ ArgVal->hasOneUse()) {
+ ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
+ ArgReg = getRegForValue(ArgVal);
+ if (ArgReg == 0) return false;
+
+ MVT ArgVT;
+ if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
+
+ ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
+ ArgVal->hasOneUse(), 1);
+ } else {
+ ArgReg = getRegForValue(ArgVal);
+ }
+
+ if (ArgReg == 0) return false;
+
+ const Type *ArgTy = ArgVal->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
+ if (ArgVT == MVT::x86mmx)
+ return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
- Args.push_back(Arg);
- ArgVals.push_back(*i);
+ Args.push_back(ArgReg);
+ ArgVals.push_back(ArgVal);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
}
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
+ I->getParent()->getContext());
// Allocate shadow area for Win64
- if (Subtarget->isTargetWin64()) {
+ if (Subtarget->isTargetWin64())
CCInfo.AllocateStack(32, 8);
- }
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
@@ -1555,6 +1645,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
@@ -1562,6 +1654,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
break;
}
case CCValAssign::ZExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
@@ -1569,9 +1663,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
break;
}
case CCValAssign::AExt: {
- // We don't handle MMX parameters yet.
- if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
- return false;
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
if (!Emitted)
@@ -1605,14 +1698,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
AM.Base.Reg = StackPtr;
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
-
- // If this is a really simple value, emit this with the Value* version of
- // X86FastEmitStore. If it isn't simple, we don't want to do this, as it
- // can cause us to reevaluate the argument.
- if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
+ ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
+
+ if (Flags.isByVal()) {
+ X86AddressMode SrcAM;
+ SrcAM.Base.Reg = Arg;
+ bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
+ assert(Res && "memcpy length already checked!"); (void)Res;
+ } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
+ // If this is a really simple value, emit this with the Value* version
+ //of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // as it can cause us to reevaluate the argument.
X86FastEmitStore(ArgVT, ArgVal, AM);
- else
+ } else {
X86FastEmitStore(ArgVT, Arg, AM);
+ }
}
}
@@ -1624,6 +1724,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
X86::EBX).addReg(Base);
}
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
+ X86::AL).addImm(NumXMMRegs);
+ }
+
// Issue the call.
MachineInstrBuilder MIB;
if (CalleeOp) {
@@ -1662,7 +1773,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -1670,80 +1782,100 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
- .addGlobalAddress(GV, 0, OpFlags);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
+ if (MemIntName)
+ MIB.addExternalSymbol(MemIntName, OpFlags);
+ else
+ MIB.addGlobalAddress(GV, 0, OpFlags);
}
// Add an implicit use GOT pointer in EBX.
if (Subtarget->isPICStyleGOT())
MIB.addReg(X86::EBX);
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+ MIB.addReg(X86::AL);
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
// Issue CALLSEQ_END
unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ unsigned NumBytesCallee = 0;
+ if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+ NumBytesCallee = 4;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
- .addImm(NumBytes).addImm(0);
+ .addImm(NumBytes).addImm(NumBytesCallee);
+
+ // Build info for return calling conv lowering code.
+ // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
+ SmallVector<ISD::InputArg, 32> Ins;
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, I->getType(), RetTys);
+ for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
+ EVT VT = RetTys[i];
+ EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
+ for (unsigned j = 0; j != NumRegs; ++j) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT.getSimpleVT();
+ MyFlags.Used = !CS.getInstruction()->use_empty();
+ if (CS.paramHasAttr(0, Attribute::SExt))
+ MyFlags.Flags.setSExt();
+ if (CS.paramHasAttr(0, Attribute::ZExt))
+ MyFlags.Flags.setZExt();
+ if (CS.paramHasAttr(0, Attribute::InReg))
+ MyFlags.Flags.setInReg();
+ Ins.push_back(MyFlags);
+ }
+ }
- // Now handle call return value (if any).
+ // Now handle call return values.
SmallVector<unsigned, 4> UsedRegs;
- if (RetVT != MVT::isVoid) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
- CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
-
- // Copy all of the result registers out of their specified physreg.
- assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
- EVT CopyVT = RVLocs[0].getValVT();
- TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
+ I->getParent()->getContext());
+ unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ unsigned CopyReg = ResultReg + i;
// If this is a call to a function that returns an fp value on the x87 fp
// stack, but where we prefer to use the value in xmm registers, copy it
// out as F80 and use a truncate to move it from fp stack reg to xmm reg.
- if ((RVLocs[0].getLocReg() == X86::ST0 ||
- RVLocs[0].getLocReg() == X86::ST1) &&
+ if ((RVLocs[i].getLocReg() == X86::ST0 ||
+ RVLocs[i].getLocReg() == X86::ST1) &&
isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
CopyVT = MVT::f80;
- DstRC = X86::RFP80RegisterClass;
+ CopyReg = createResultReg(X86::RFP80RegisterClass);
}
- unsigned ResultReg = createResultReg(DstRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(RVLocs[0].getLocReg());
- UsedRegs.push_back(RVLocs[0].getLocReg());
+ CopyReg).addReg(RVLocs[i].getLocReg());
+ UsedRegs.push_back(RVLocs[i].getLocReg());
- if (CopyVT != RVLocs[0].getValVT()) {
+ if (CopyVT != RVLocs[i].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register. This is accomplished by storing the F80 value in memory and
// then loading it back. Ewww...
- EVT ResVT = RVLocs[0].getValVT();
+ EVT ResVT = RVLocs[i].getValVT();
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
int FI = MFI.CreateStackObject(MemSize, MemSize, false);
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc)), FI)
- .addReg(ResultReg);
- DstRC = ResVT == MVT::f32
- ? X86::FR32RegisterClass : X86::FR64RegisterClass;
+ .addReg(CopyReg);
Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
- ResultReg = createResultReg(DstRC);
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg), FI);
- }
-
- if (AndToI1) {
- // Mask out all but lowest bit for some call which produces an i1.
- unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
- ResultReg = AndResult;
+ TII.get(Opc), ResultReg + i), FI);
}
-
- UpdateValueMap(I, ResultReg);
}
+ if (RVLocs.size())
+ UpdateValueMap(I, ResultReg, RVLocs.size());
+
// Set all unused physreg defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
@@ -1782,8 +1914,6 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return X86SelectFPExt(I);
case Instruction::FPTrunc:
return X86SelectFPTrunc(I);
- case Instruction::ExtractValue:
- return X86SelectExtractValue(I);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
@@ -1856,10 +1986,13 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
if (X86SelectAddress(C, AM)) {
- if (TLI.getPointerTy() == MVT::i32)
- Opc = X86::LEA32r;
- else
- Opc = X86::LEA64r;
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+ return AM.Base.Reg;
+
+ Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg), AM);
@@ -1921,6 +2054,45 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
return ResultReg;
}
+unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+ MVT VT;
+ if (!isTypeLegal(CF->getType(), VT))
+ return false;
+
+ // Get opcode and regclass for the given zero.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::FsFLD0SS;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::FsFLD0SD;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+ return ResultReg;
+}
+
+
/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 3aaa693..325d061 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1307,7 +1307,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// set up by FpSET_ST0, and our StackTop is off by one because of it.
unsigned Op0 = getFPReg(MI->getOperand(0));
// Restore the actual StackTop from before Fp_SET_ST0.
- // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
+ // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we
// are not enforcing the constraint.
++StackTop;
unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 071fbe0..cd4e954 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====//
+//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -159,8 +160,10 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
Opc = isSub
? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
: (Is64Bit ? X86::POP64r : X86::POP32r);
- BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
.addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+ if (isSub)
+ MI->setFlag(MachineInstr::FrameSetup);
Offset -= ThisVal;
continue;
}
@@ -170,6 +173,8 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(ThisVal);
+ if (isSub)
+ MI->setFlag(MachineInstr::FrameSetup);
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
Offset -= ThisVal;
}
@@ -296,7 +301,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
// FIXME: This is dirty hack. The code itself is pretty mess right now.
// It should be rewritten from scratch and generalized sometimes.
- // Determine maximum offset (minumum due to stack growth).
+ // Determine maximum offset (minimum due to stack growth).
int64_t MaxOffset = 0;
for (std::vector<CalleeSavedInfo>::const_iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I)
@@ -354,7 +359,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() ||
- !Fn->doesNotThrow() || UnwindTablesMandatory;
+ Fn->needsUnwindTableEntry();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
@@ -408,7 +413,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
- .addImm(-TailCallReturnAddrDelta);
+ .addImm(-TailCallReturnAddrDelta)
+ .setMIFlag(MachineInstr::FrameSetup);
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
}
@@ -446,7 +452,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
- .addReg(FramePtr, RegState::Kill);
+ .addReg(FramePtr, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
if (needsFrameMoves) {
// Mark the place where EBP/RBP was saved.
@@ -473,7 +480,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Update EBP with the new base value...
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
- .addReg(StackPtr);
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
@@ -615,7 +623,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
TII, *RegInfo);
- if ((NumBytes || PushedRegs) && needsFrameMoves) {
+ if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
@@ -641,7 +649,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
}
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+ MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
@@ -785,7 +793,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
assert(Offset >= 0 && "Offset should never be negative");
if (Offset) {
- // Check for possible merge with preceeding ADD instruction.
+ // Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
}
@@ -829,7 +837,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
int delta = -1*X86FI->getTCReturnAddrDelta();
MBBI = MBB.getLastNonDebugInstr();
- // Check for possible merge with preceeding ADD instruction.
+ // Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
}
@@ -918,7 +926,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// X86RegisterInfo::emitPrologue will handle spilling of frame register.
continue;
CalleeFrameSize += SlotSize;
- BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
}
X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9b0ec6e..1fcc274 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -189,6 +189,7 @@ namespace {
SDNode *Select(SDNode *N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
+ SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
@@ -1329,6 +1330,8 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
return ResNode;
}
+// FIXME: Figure out some way to unify this with the 'or' and other code
+// below.
SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
@@ -1479,6 +1482,158 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
}
}
+enum AtomicOpc {
+ OR,
+ AND,
+ XOR,
+ AtomicOpcEnd
+};
+
+enum AtomicSz {
+ ConstantI8,
+ I8,
+ SextConstantI16,
+ ConstantI16,
+ I16,
+ SextConstantI32,
+ ConstantI32,
+ I32,
+ SextConstantI64,
+ ConstantI64,
+ I64,
+ AtomicSzEnd
+};
+
+static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+ {
+ X86::LOCK_OR8mi,
+ X86::LOCK_OR8mr,
+ X86::LOCK_OR16mi8,
+ X86::LOCK_OR16mi,
+ X86::LOCK_OR16mr,
+ X86::LOCK_OR32mi8,
+ X86::LOCK_OR32mi,
+ X86::LOCK_OR32mr,
+ X86::LOCK_OR64mi8,
+ X86::LOCK_OR64mi32,
+ X86::LOCK_OR64mr
+ },
+ {
+ X86::LOCK_AND8mi,
+ X86::LOCK_AND8mr,
+ X86::LOCK_AND16mi8,
+ X86::LOCK_AND16mi,
+ X86::LOCK_AND16mr,
+ X86::LOCK_AND32mi8,
+ X86::LOCK_AND32mi,
+ X86::LOCK_AND32mr,
+ X86::LOCK_AND64mi8,
+ X86::LOCK_AND64mi32,
+ X86::LOCK_AND64mr
+ },
+ {
+ X86::LOCK_XOR8mi,
+ X86::LOCK_XOR8mr,
+ X86::LOCK_XOR16mi8,
+ X86::LOCK_XOR16mi,
+ X86::LOCK_XOR16mr,
+ X86::LOCK_XOR32mi8,
+ X86::LOCK_XOR32mi,
+ X86::LOCK_XOR32mr,
+ X86::LOCK_XOR64mi8,
+ X86::LOCK_XOR64mi32,
+ X86::LOCK_XOR64mr
+ }
+};
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ // Optimize common patterns for __sync_or_and_fetch and similar arith
+ // operations where the result is not used. This allows us to use the "lock"
+ // version of the arithmetic instruction.
+ // FIXME: Same as for 'add' and 'sub', try to merge those down here.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ // Which index into the table.
+ enum AtomicOpc Op;
+ switch (Node->getOpcode()) {
+ case ISD::ATOMIC_LOAD_OR:
+ Op = OR;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Op = AND;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Op = XOR;
+ break;
+ default:
+ return 0;
+ }
+
+ bool isCN = false;
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+ if (CN) {
+ isCN = true;
+ Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+ }
+
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ if (isCN)
+ Opc = AtomicOpcTbl[Op][ConstantI8];
+ else
+ Opc = AtomicOpcTbl[Op][I8];
+ break;
+ case MVT::i16:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI16];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI16];
+ } else
+ Opc = AtomicOpcTbl[Op][I16];
+ break;
+ case MVT::i32:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI32];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI32];
+ } else
+ Opc = AtomicOpcTbl[Op][I32];
+ break;
+ case MVT::i64:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI64];
+ else if (i64immSExt32(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][ConstantI64];
+ } else
+ Opc = AtomicOpcTbl[Op][I64];
+ break;
+ }
+
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+}
+
/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
/// any uses which require the SF or OF bits to be accurate.
static bool HasNoSignedComparisonUses(SDNode *N) {
@@ -1580,6 +1735,89 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal;
break;
}
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR: {
+ SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ // For operations of the form (x << C1) op C2, check if we can use a smaller
+ // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+ break;
+
+ // i8 is unshrinkable, i16 should be promoted to i32.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ break;
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!Cst || !ShlCst)
+ break;
+
+ int64_t Val = Cst->getSExtValue();
+ uint64_t ShlVal = ShlCst->getZExtValue();
+
+ // Make sure that we don't change the operation by removing bits.
+ // This only matters for OR and XOR, AND is unaffected.
+ if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+ break;
+
+ unsigned ShlOp, Op = 0;
+ EVT CstVT = NVT;
+
+ // Check the minimum bitwidth for the new constant.
+ // TODO: AND32ri is the same as AND64ri32 with zext imm.
+ // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+ // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+ if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
+ CstVT = MVT::i8;
+ else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
+ CstVT = MVT::i32;
+
+ // Bail if there is no smaller encoding.
+ if (NVT == CstVT)
+ break;
+
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i32:
+ assert(CstVT == MVT::i8);
+ ShlOp = X86::SHL32ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = X86::AND32ri8; break;
+ case ISD::OR: Op = X86::OR32ri8; break;
+ case ISD::XOR: Op = X86::XOR32ri8; break;
+ }
+ break;
+ case MVT::i64:
+ assert(CstVT == MVT::i8 || CstVT == MVT::i32);
+ ShlOp = X86::SHL64ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
+ case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
+ case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+ }
+ break;
+ }
+
+ // Emit the smaller op and the shift.
+ SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+ SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+ return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
+ getI8Imm(ShlVal));
+ break;
+ }
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
@@ -1768,17 +2006,17 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
Move =
- SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
MVT::Other, Ops,
array_lengthof(Ops)), 0);
Chain = Move.getValue(1);
ReplaceUses(N0.getValue(1), Chain);
} else {
Move =
- SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
Chain = CurDAG->getEntryNode();
}
- Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
InFlag = Chain.getValue(1);
} else {
InFlag =
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cd1d201..1cdf2b6 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -222,7 +222,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
-
+
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
if (Subtarget->is64Bit())
@@ -574,6 +574,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ // Lower this to FGETSIGNx86 plus an AND.
+ setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
+ setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
+
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
@@ -927,7 +931,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Can turn SHL into an integer multiply.
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
- setOperationAction(ISD::SRL, MVT::v4i32, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
@@ -949,6 +952,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
+ if (Subtarget->hasSSE2()) {
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i16, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v8i16, Custom);
+ }
+
if (Subtarget->hasSSE42())
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
@@ -1081,6 +1097,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
@@ -1096,6 +1113,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
+
+ setPrefFunctionAlignment(4);
}
@@ -1247,11 +1266,6 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
- return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
-}
-
// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(EVT VT) const{
@@ -1306,11 +1320,12 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
#include "X86GenCallingConv.inc"
bool
-X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
@@ -1325,7 +1340,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
@@ -1476,8 +1491,8 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
@@ -1518,20 +1533,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
- } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
- // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
- if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
- MVT::v2i64, InFlag).getValue(1);
- Val = Chain.getValue(0);
- Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
- Val, DAG.getConstant(0, MVT::i64));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
- MVT::i64, InFlag).getValue(1);
- Val = Chain.getValue(0);
- }
- Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val);
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
@@ -1680,7 +1681,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
@@ -1952,7 +1953,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
return SDValue(OutRetAddr.getNode(), 1);
}
-/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
@@ -2007,7 +2008,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
@@ -2043,7 +2044,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue RetAddrFrIdx;
- // Load return adress for tail calls.
+ // Load return address for tail calls.
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
@@ -2200,7 +2201,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
- // Do not flag preceeding copytoreg stuff together with the following stuff.
+ // Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
if (GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -2270,6 +2271,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const GlobalValue *GV = G->getGlobal();
if (!GV->hasDLLImportLinkage()) {
unsigned char OpFlags = 0;
+ bool ExtraLoad = false;
+ unsigned WrapperKind = ISD::DELETED_NODE;
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
// external symbols most go through the PLT in PIC mode. If the symbol
@@ -2281,15 +2284,34 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ isa<Function>(GV) &&
+ cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ // If the function is marked as non-lazy, generate an indirect call
+ // which loads from the GOT directly. This avoids runtime overhead
+ // at the cost of eager binding (and one extra byte of encoding).
+ OpFlags = X86II::MO_GOTPCREL;
+ WrapperKind = X86ISD::WrapperRIP;
+ ExtraLoad = true;
}
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
G->getOffset(), OpFlags);
+
+ // Add a wrapper if needed.
+ if (WrapperKind != ISD::DELETED_NODE)
+ Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+ // Add extra indirection if needed.
+ if (ExtraLoad)
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(),
+ false, false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
@@ -2300,7 +2322,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -2528,16 +2551,30 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (RegInfo->needsStackRealignment(MF))
return false;
- // Do not sibcall optimize vararg calls unless the call site is not passing
- // any arguments.
- if (isVarArg && !Outs.empty())
- return false;
-
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
return false;
+ // Do not sibcall optimize vararg calls unless all arguments are passed via
+ // registers.
+ if (isVarArg && !Outs.empty()) {
+
+ // Optimizing for varargs on Win64 is unlikely to be safe without
+ // additional testing.
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ if (!ArgLocs[i].isRegLoc())
+ return false;
+ }
+
// If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
// Therefore if it's not used by the call it is not safe to optimize this into
// a sibcall.
@@ -2550,8 +2587,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CalleeCC, false, getTargetMachine(),
- RVLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
@@ -2564,13 +2601,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- CCState CCInfo1(CalleeCC, false, getTargetMachine(),
- RVLocs1, *DAG.getContext());
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
- CCState CCInfo2(CallerCC, false, getTargetMachine(),
- RVLocs2, *DAG.getContext());
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
@@ -2596,8 +2633,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
- ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (Subtarget->isTargetWin64()) {
@@ -4018,7 +4055,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
/// getNumOfConsecutiveZeros - Return the number of elements of a vector
/// shuffle operation which come from a consecutively from a zero. The
-/// search can start in two diferent directions, from left or right.
+/// search can start in two different directions, from left or right.
static
unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
bool ZerosFromLeft, SelectionDAG &DAG) {
@@ -6617,9 +6654,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
}
-/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount.
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -6708,12 +6745,18 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
unsigned ByteSize = SrcVT.getSizeInBits()/8;
- int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
- MachineMemOperand *MMO =
- DAG.getMachineFunction()
- .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOLoad, ByteSize, ByteSize);
-
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
+ MachineMemOperand *MMO;
+ if (FI) {
+ int SSFI = FI->getIndex();
+ MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
+ } else {
+ MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
+ StackSlot = StackSlot.getOperand(1);
+ }
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
X86ISD::FILD, DL,
@@ -7204,6 +7247,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
}
+SDValue X86TargetLowering::LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
+ SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
+}
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
@@ -8779,16 +8833,71 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
-SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
LLVMContext *Context = DAG.getContext();
- assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later");
+ // Must have SSE2.
+ if (!Subtarget->hasSSE2()) return SDValue();
+
+ // Optimize shl/srl/sra with constant shift amount.
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+ uint64_t ShiftAmt = C->getZExtValue();
+
+ if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ // Lower SHL with variable shift amount.
+ // Cannot lower SHL without SSE4.1 or later.
+ if (!Subtarget->hasSSE41()) return SDValue();
- if (VT == MVT::v4i32) {
+ if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
Op.getOperand(1), DAG.getConstant(23, MVT::i32));
@@ -8807,7 +8916,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
- if (VT == MVT::v16i8) {
+ if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
// a = a << 5;
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
@@ -9112,7 +9221,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
- case ISD::SRL_PARTS: return LowerShift(Op, DAG);
+ case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
@@ -9120,6 +9229,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
@@ -9140,7 +9250,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
- case ISD::SHL: return LowerSHL(Op, DAG);
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: return LowerShift(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -9307,6 +9419,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
+ case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
+ case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
@@ -10984,14 +11098,14 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
UE = Uses.end(); UI != UE; ++UI) {
SDNode *Extract = *UI;
- // Compute the element's address.
+ // cOMpute the element's address.
SDValue Idx = Extract->getOperand(1);
unsigned EltSize =
InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
StackPtr, OffsetVal);
// Load the scalar.
@@ -11264,15 +11378,28 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
return SDValue();
+ SDValue FalseOp = N->getOperand(0);
+ SDValue TrueOp = N->getOperand(1);
+ X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+ SDValue Cond = N->getOperand(3);
+ if (CC == X86::COND_E || CC == X86::COND_NE) {
+ switch (Cond.getOpcode()) {
+ default: break;
+ case X86ISD::BSR:
+ case X86ISD::BSF:
+ // If operand of BSR / BSF are proven never zero, then ZF cannot be set.
+ if (DAG.isKnownNeverZero(Cond.getOperand(0)))
+ return (CC == X86::COND_E) ? FalseOp : TrueOp;
+ }
+ }
+
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
- if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
// Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
// larger than FalseC (the false value).
- X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
-
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
@@ -11282,7 +11409,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
// This is efficient for any integer data type (including i8/i16) and
// shift amount.
if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
- SDValue Cond = N->getOperand(3);
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
@@ -11300,7 +11426,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
// for any integer data type, including i8/i16.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
- SDValue Cond = N->getOperand(3);
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
@@ -11339,7 +11464,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
- SDValue Cond = N->getOperand(3);
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
// Zero extend the condition if needed.
@@ -11574,12 +11698,94 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
}
+// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
+// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
+// and friends. Likewise for OR -> CMPNEQSS.
+static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ unsigned opcode;
+
+ // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
+ // we're requiring SSE2 for both.
+ if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CMP0 = N0->getOperand(1);
+ SDValue CMP1 = N1->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // The SETCCs should both refer to the same CMP.
+ if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
+ return SDValue();
+
+ SDValue CMP00 = CMP0->getOperand(0);
+ SDValue CMP01 = CMP0->getOperand(1);
+ EVT VT = CMP00.getValueType();
+
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ bool ExpectingFlags = false;
+ // Check for any users that want flags:
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end();
+ !ExpectingFlags && UI != UE; ++UI)
+ switch (UI->getOpcode()) {
+ default:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ case ISD::SELECT:
+ ExpectingFlags = true;
+ break;
+ case ISD::CopyToReg:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ break;
+ }
+
+ if (!ExpectingFlags) {
+ enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
+ enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+
+ if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
+ X86::CondCode tmp = cc0;
+ cc0 = cc1;
+ cc1 = tmp;
+ }
+
+ if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
+ (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
+ bool is64BitFP = (CMP00.getValueType() == MVT::f64);
+ X86ISD::NodeType NTOperator = is64BitFP ?
+ X86ISD::FSETCCsd : X86ISD::FSETCCss;
+ // FIXME: need symbolic constants for these magic numbers.
+ // See X86ATTInstPrinter.cpp:printSSECC().
+ unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
+ SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+ DAG.getConstant(x86cc, MVT::i8));
+ SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
+ OnesOrZeroesF);
+ SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
+ DAG.getConstant(1, MVT::i32));
+ SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
+ return OneBitOfTruth;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
// Want to form PANDN nodes, in the hopes of then easily combining them with
// OR and AND nodes to form PBLEND/PSIGN.
EVT VT = N->getValueType(0);
@@ -11609,6 +11815,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
EVT VT = N->getValueType(0);
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
return SDValue();
@@ -11976,6 +12186,26 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op0 = N->getOperand(0);
+ // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
+ // a 32-bit target where SSE doesn't support i64->FP operations.
+ if (Op0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
+ EVT VT = Ld->getValueType(0);
+ if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+ ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
+ !XTLI->getSubtarget()->is64Bit() &&
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+ DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
+ return FILDChain;
+ }
+ }
+ return SDValue();
+}
+
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
@@ -12060,6 +12290,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
@@ -12216,7 +12447,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
- // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // FIXME: this should verify that we are targeting a 486 or better. If not,
// we will turn this bswap into something that will be lowered to logical ops
// instead of emitting the bswap asm. For now, we don't support 486 or lower
// so don't worry about this.
@@ -12489,12 +12720,16 @@ LowerXConstraint(EVT ConstraintVT) const {
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- char Constraint,
+ std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
- switch (Constraint) {
+ // Only support length 1 constraints for now.
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
default: break;
case 'I':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
@@ -12686,7 +12921,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, X86::GR8RegisterClass);
if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16RegisterClass);
- if (VT == MVT::i32 || !Subtarget->is64Bit())
+ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
return std::make_pair(0U, X86::GR32RegisterClass);
return std::make_pair(0U, X86::GR64RegisterClass);
case 'R': // LEGACY_REGS
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 6301057..d61a125 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -94,6 +94,15 @@ namespace llvm {
// one's or all zero's.
SETCC_CARRY, // R = carry_bit ? ~0 : 0
+ /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
+ /// Operands are two FP values to compare; result is a mask of
+ /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
+ FSETCCss, FSETCCsd,
+
+ /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
+ /// result in an integer GPR. Needs masking for scalar result.
+ FGETSIGNx86,
+
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
/// flag operand produced by a CMP or TEST instruction. It also writes a
@@ -592,7 +601,7 @@ namespace llvm {
/// true it means one of the asm constraint of the inline asm instruction
/// being processed is 'm'.
virtual void LowerAsmOperandForConstraint(SDValue Op,
- char ConstraintLetter,
+ std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -674,15 +683,15 @@ namespace llvm {
/// or null if the target does not support "fast" ISel.
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- virtual unsigned getFunctionAlignment(const Function *F) const;
-
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
/// space, and populates the address space and offset as
/// appropriate.
virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+ SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
+ SelectionDAG &DAG) const;
+
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(EVT VT) const;
@@ -773,9 +782,7 @@ namespace llvm {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
- SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
- SelectionDAG &DAG) const;
+ SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
@@ -786,6 +793,7 @@ namespace llvm {
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToBT(SDValue And, ISD::CondCode CC,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
@@ -808,7 +816,7 @@ namespace llvm {
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
@@ -850,9 +858,10 @@ namespace llvm {
ISD::NodeType ExtendKind) const;
virtual bool
- CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const;
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) const;
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 45d1c6b..dd4f6a5 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -12,66 +12,91 @@
//
//===----------------------------------------------------------------------===//
-// FIXME: We don't support any intrinsics for these instructions yet.
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+ : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
-class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src1 = $dst";
}
-class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
- : I<o, F, (outs VR64:$dst), ins,
- !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
- TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+ Has3DNow0F0FOpcode {
// FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
let isAsmParserOnly = 1;
}
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
-let Constraints = "$src1 = $dst" in {
- // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
- // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
- multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
- def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
- def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
- }
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn))
+ (bitconvert (load_mmx addr:$src))))]>;
}
-defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">;
-defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">;
-defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">;
-defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">;
-defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">;
-defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">;
-defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">;
-defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">;
-defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">;
-defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">;
-defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">;
-defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
-defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
-defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
-defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">;
-defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">;
-defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">;
-defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">;
-defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">;
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
"prefetch $addr", []>;
-
+
// FIXME: Diassembler gets a bogus decode conflict.
-let isAsmParserOnly = 1 in {
+let isAsmParserOnly = 1 in
def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
"prefetchw $addr", []>;
-}
// "3DNowA" instructions
-defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">;
-defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">;
-defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">;
-defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">;
-defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">;
+defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f0ea068..9f7a4b0 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -163,7 +163,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
} // Defs = [EFLAGS]
-// Suprisingly enough, these are not two address instructions!
+// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 4c915d9..adcc747 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -214,6 +214,30 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
(SETBr)>;
+// (add OP, SETB) -> (adc OP, 0)
+def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
+ (ADC64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETB) -> (sbb OP, 0)
+def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETCC_CARRY) -> (adc OP, 0)
+def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC64ri8 GR64:$op, 0)>;
+
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
@@ -519,85 +543,98 @@ def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
Requires<[In64BitMode]>, LOCK;
-// Optimized codegen when the non-memory output is not used.
+// RegOpc corresponds to the mr version of the instruction
+// ImmOpc corresponds to the mi version of the instruction
+// ImmOpc8 corresponds to the mi8 version of the instruction
+// ImmMod corresponds to the instruction format of the mi and mi8 versions
+multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
+ Format ImmMod, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
- (ins i64mem:$dst, i64i32imm :$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
- (ins i64mem:$dst, i64i8imm :$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+ MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ !strconcat("lock\n\t", mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, OpSize, LOCK;
+def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+ ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+}
-def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
- (ins i64mem:$dst, i64i32imm:$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+}
+defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
+defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
+defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
+defm LOCK_AND : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM4m, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM6m, "xor">;
-def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
- (ins i64mem:$dst, i64i8imm :$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
"lock\n\t"
@@ -960,7 +997,8 @@ def : Pat<(extloadi64i32 addr:$src),
// anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates.
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
+def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
+ (MOVZX32rr8 GR8 :$src), sub_16bit)>;
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
@@ -1127,9 +1165,9 @@ def : Pat<(and GR32:$src1, 0xff),
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
- GR16_ABCD)),
- sub_8bit))>,
+ (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
+ (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
+ sub_16bit)>,
Requires<[In32BitMode]>;
// r & (2^32-1) ==> movz
@@ -1147,7 +1185,8 @@ def : Pat<(and GR32:$src1, 0xff),
Requires<[In64BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
+ (EXTRACT_SUBREG (MOVZX32rr8 (i8
+ (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
Requires<[In64BitMode]>;
@@ -1159,10 +1198,11 @@ def : Pat<(sext_inreg GR32:$src, i8),
GR32_ABCD)),
sub_8bit))>,
Requires<[In32BitMode]>;
+
def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit))>,
+ (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
+ (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
+ sub_16bit)>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR64:$src, i32),
@@ -1175,9 +1215,19 @@ def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
Requires<[In64BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
+ (EXTRACT_SUBREG (MOVSX32rr8
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
Requires<[In64BitMode]>;
+// sext, sext_load, zext, zext_load
+def: Pat<(i16 (sext GR8:$src)),
+ (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(sextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
+def: Pat<(i16 (zext GR8:$src)),
+ (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(zextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
@@ -1318,6 +1368,11 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
// (shl x, 1) ==> (add x, x)
+// Note that if x is undef (immediate or otherwise), we could theoretically
+// end up with the two uses of x getting different values, producing a result
+// where the least significant bit is not 0. However, the probability of this
+// happening is considered low enough that this is officially not a
+// "real problem".
def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
@@ -1474,12 +1529,6 @@ def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
-// Optimize multiply by 2 with EFLAGS result.
-let AddedComplexity = 2 in {
-def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
-}
-
// Patterns for nodes that do not produce flags, for instructions that do.
// addition
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 867c0f8..2e1d523 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -38,22 +38,11 @@ let neverHasSideEffects = 1 in {
// Sign/Zero extenders
-// Use movsbl intead of movsbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movsbw included for the disassembler.
-def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-
-// FIXME: Use a pat pattern or define a syntax here.
-let isCodeGenOnly=1 in {
-def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
-def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
-}
-def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))]>, TB;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
@@ -66,20 +55,10 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
-// Use movzbl intead of movzbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movzbw included for the disassembler.
-def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-// FIXME: Use a pat pattern or define a syntax here.
-let isCodeGenOnly=1 in {
-def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
-def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
-}
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))]>, TB;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 3cbfac1..7c9a9f7 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -38,8 +38,11 @@ def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
+def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 21df57c..b3237d5 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -232,7 +232,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -335,7 +335,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -460,7 +460,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -682,7 +682,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -916,7 +916,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVSDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
@@ -1790,7 +1789,6 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
.addMBB(UnCondBrIter->getOperand(0).getMBB());
BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
.addMBB(TargetBB);
- MBB.addSuccessor(TargetBB);
OldInst->eraseFromParent();
UnCondBrIter->eraseFromParent();
@@ -2016,62 +2014,48 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
bool isStackAligned,
const TargetMachine &TM,
bool load) {
- switch (RC->getID()) {
+ switch (RC->getSize()) {
default:
- llvm_unreachable("Unknown regclass");
- case X86::GR64RegClassID:
- case X86::GR64_ABCDRegClassID:
- case X86::GR64_NOREXRegClassID:
- case X86::GR64_NOREX_NOSPRegClassID:
- case X86::GR64_NOSPRegClassID:
- case X86::GR64_TCRegClassID:
- case X86::GR64_TCW64RegClassID:
- return load ? X86::MOV64rm : X86::MOV64mr;
- case X86::GR32RegClassID:
- case X86::GR32_ABCDRegClassID:
- case X86::GR32_ADRegClassID:
- case X86::GR32_NOREXRegClassID:
- case X86::GR32_NOSPRegClassID:
- case X86::GR32_TCRegClassID:
- return load ? X86::MOV32rm : X86::MOV32mr;
- case X86::GR16RegClassID:
- case X86::GR16_ABCDRegClassID:
- case X86::GR16_NOREXRegClassID:
- return load ? X86::MOV16rm : X86::MOV16mr;
- case X86::GR8RegClassID:
- // Copying to or from a physical H register on x86-64 requires a NOREX
- // move. Otherwise use a normal move.
- if (isHReg(Reg) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
- return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
- else
- return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::GR8_ABCD_LRegClassID:
- case X86::GR8_NOREXRegClassID:
- return load ? X86::MOV8rm :X86::MOV8mr;
- case X86::GR8_ABCD_HRegClassID:
+ llvm_unreachable("Unknown spill size");
+ case 1:
+ assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
if (TM.getSubtarget<X86Subtarget>().is64Bit())
- return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
- else
- return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::RFP80RegClassID:
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case 2:
+ assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case 4:
+ if (X86::GR32RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ if (X86::FR32RegClass.hasSubClassEq(RC))
+ return load ? X86::MOVSSrm : X86::MOVSSmr;
+ if (X86::RFP32RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+ llvm_unreachable("Unknown 4-byte regclass");
+ case 8:
+ if (X86::GR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ if (X86::FR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MOVSDrm : X86::MOVSDmr;
+ if (X86::VR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
+ if (X86::RFP64RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+ llvm_unreachable("Unknown 8-byte regclass");
+ case 10:
+ assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
- case X86::RFP64RegClassID:
- return load ? X86::LD_Fp64m : X86::ST_Fp64m;
- case X86::RFP32RegClassID:
- return load ? X86::LD_Fp32m : X86::ST_Fp32m;
- case X86::FR32RegClassID:
- return load ? X86::MOVSSrm : X86::MOVSSmr;
- case X86::FR64RegClassID:
- return load ? X86::MOVSDrm : X86::MOVSDmr;
- case X86::VR128RegClassID:
+ case 16:
+ assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
// If stack is realigned we can use aligned stores.
if (isStackAligned)
return load ? X86::MOVAPSrm : X86::MOVAPSmr;
else
return load ? X86::MOVUPSrm : X86::MOVUPSmr;
- case X86::VR64RegClassID:
- return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
}
}
@@ -2241,6 +2225,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
bool isTwoAddr = NumOps > 1 &&
MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOpcode() == X86::ADD32ri &&
+ MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return NULL;
+
MachineInstr *NewMI = NULL;
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
@@ -2429,7 +2419,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 4;
break;
default:
- llvm_unreachable("Don't know how to fold this instruction!");
+ return 0;
}
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
@@ -2535,6 +2525,12 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
case X86::TEST32rr:
case X86::TEST64rr:
return true;
+ case X86::ADD32ri:
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return false;
+ break;
}
}
@@ -2845,11 +2841,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::FsMOVAPDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
- case X86::MOVDQUrm_Int:
break;
}
switch (Opc2) {
@@ -2869,11 +2863,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::FsMOVAPDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
- case X86::MOVDQUrm_Int:
break;
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 4625b4c..d895023 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -449,7 +449,6 @@ namespace X86II {
SSEDomainShift = SegOvrShift + 2,
OpcodeShift = SSEDomainShift + 2,
- OpcodeMask = 0xFFULL << OpcodeShift,
//===------------------------------------------------------------------===//
/// VEX - The opcode prefix used by AVX instructions
@@ -807,7 +806,7 @@ public:
int64_t &Offset1, int64_t &Offset2) const;
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
- /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
/// be scheduled togther. On some targets if two loads are loading from
/// addresses in the same cache line, it's better if they are scheduled
/// together. This function takes two integers that represent the load offsets
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index f832a7c..8cab808 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -23,6 +23,9 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3,
def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
+def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+
def SDTX86Cmov : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
@@ -459,7 +462,7 @@ def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
include "X86InstrFormats.td"
//===----------------------------------------------------------------------===//
-// Pattern fragments...
+// Pattern fragments.
//
// X86 specific condition code. These correspond to CondCode in
@@ -481,21 +484,21 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
+let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
+ def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+ def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+ def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+}
-def i16immSExt8 : PatLeaf<(i16 immSext8)>;
-def i32immSExt8 : PatLeaf<(i32 immSext8)>;
-def i64immSExt8 : PatLeaf<(i64 immSext8)>;
-def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
-def i64immZExt32 : PatLeaf<(i64 imm), [{
- // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // unsignedsign extended field.
- return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
-def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
- uint64_t v = N->getZExtValue();
- return v == (uint32_t)v && (int32_t)v == (int8_t)v;
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+ return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
}]>;
// Helper fragments for loads.
@@ -1437,7 +1440,7 @@ def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
// Various unary fpstack operations default to operating on on ST1.
// For example, "fxch" -> "fxch %st(1)"
-def : InstAlias<"faddp", (ADD_FPrST0 ST1)>;
+def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>;
def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>;
def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
@@ -1455,13 +1458,15 @@ def : InstAlias<"fucompi", (UCOM_FIPr ST1)>;
// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
// gas.
-multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> {
- def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), (Inst RST:$op)>;
- def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>;
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),
+ (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"),
+ (Inst ST0), EmitAlias>;
}
defm : FpUnaryAlias<"fadd", ADD_FST0r>;
-defm : FpUnaryAlias<"faddp", ADD_FPrST0>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
defm : FpUnaryAlias<"fsub", SUB_FST0r>;
defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>;
defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
@@ -1472,8 +1477,8 @@ defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>;
defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
-defm : FpUnaryAlias<"fcomi", COM_FIr>;
-defm : FpUnaryAlias<"fucomi", UCOM_FIr>;
+defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
defm : FpUnaryAlias<"fcompi", COM_FIPr>;
defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
@@ -1481,8 +1486,9 @@ defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
// solely because gas supports it.
-def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubp %st(0), $op", (SUBR_FPrST0 RST:$op)>;
def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
@@ -1534,29 +1540,31 @@ def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
// Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
-def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
// movsd with no operands (as opposed to the SSE scalar move of a double) is an
// alias for movsl. (as in rep; movsd)
def : InstAlias<"movsd", (MOVSD)>;
// movsx aliases
-def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
// movzx aliases
-def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
// Note: No GR32->GR64 movzx form.
// outb %dx -> outb %al, %dx
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index bb2165a..b2d9fca 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -285,7 +285,7 @@ let Constraints = "$src1 = $dst" in
defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>;
defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
-defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 8f08e68..7774057 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -135,18 +135,16 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
// is used instead. Register-to-register movss/movsd is not modeled as an
// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
-let isAsmParserOnly = 0 in {
- def VMOVSSrr : sse12_move_rr<FR32, v4f32,
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
- def VMOVSDrr : sse12_move_rr<FR64, v2f64,
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
- let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
- let AddedComplexity = 20 in
- def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
- }
+ let AddedComplexity = 20 in
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
}
let Constraints = "$src1 = $dst" in {
@@ -218,14 +216,12 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
-let isAsmParserOnly = 0 in {
def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>, XS, VEX;
def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>, XD, VEX;
-}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -251,7 +247,6 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
[(set RC:$dst, (ld_frag addr:$src))], d>;
}
-let isAsmParserOnly = 0 in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, VEX;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -269,7 +264,6 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
"movups", SSEPackedSingle>, VEX;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, 0>, OpSize, VEX;
-}
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, TB;
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -279,7 +273,6 @@ defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, 0>, TB, OpSize;
-let isAsmParserOnly = 0 in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
@@ -304,7 +297,6 @@ def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
-}
def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
@@ -328,32 +320,14 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v2f64 VR128:$src), addr:$dst)]>;
// Intrinsic forms of MOVUPS/D load and store
-let isAsmParserOnly = 0 in {
- let canFoldAsLoad = 1, isReMaterializable = 1 in
- def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
- def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
- def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
- def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
-}
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
-def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
@@ -382,7 +356,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
SSEPackedDouble>, TB, OpSize;
}
-let isAsmParserOnly = 0, AddedComplexity = 20 in {
+let AddedComplexity = 20 in {
defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
@@ -395,7 +369,6 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"\t{$src2, $dst|$dst, $src2}">;
}
-let isAsmParserOnly = 0 in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -404,7 +377,6 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
-}
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -416,7 +388,6 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
-let isAsmParserOnly = 0 in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
@@ -429,7 +400,6 @@ def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>,
VEX;
-}
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
@@ -441,7 +411,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>;
-let isAsmParserOnly = 0, AddedComplexity = 20 in {
+let AddedComplexity = 20 in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -516,7 +486,6 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
-let isAsmParserOnly = 0 in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
@@ -542,7 +511,6 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
VEX_4V;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
VEX_4V, VEX_W;
-}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
@@ -591,27 +559,25 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
-let isAsmParserOnly = 0 in {
- defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si">, XS, VEX;
- defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
- XS, VEX, VEX_W;
- defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si">, XD, VEX;
- defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
- XD, VEX, VEX_W;
-
- // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
- // Get rid of this hack or rename the intrinsics, there are several
- // intructions that only match with the intrinsic form, why create duplicates
- // to let them be recognized by the assembler?
- defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
- defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
-}
+defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si">, XS, VEX;
+defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+ XS, VEX, VEX_W;
+defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si">, XD, VEX;
+defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+ XD, VEX, VEX_W;
+
+// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+// Get rid of this hack or rename the intrinsics, there are several
+// intructions that only match with the intrinsic form, why create duplicates
+// to let them be recognized by the assembler?
+defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
f32mem, load, "cvtss2si">, XS;
defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
@@ -622,18 +588,16 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
f128mem, load, "cvtsd2si{q}">, XD, REX_W;
-let isAsmParserOnly = 0 in {
- defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
- defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
- VEX_W;
- defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
- defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
- VEX_4V, VEX_W;
-}
+defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ VEX_W;
+defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -653,7 +617,6 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
-let isAsmParserOnly = 0 in {
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
@@ -664,7 +627,6 @@ defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
"cvttsd2si">, XD, VEX, VEX_W;
-}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
@@ -676,7 +638,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
"cvttsd2si{q}">, XD, REX_W;
-let isAsmParserOnly = 0, Pattern = []<dag> in {
+let Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
@@ -702,7 +664,6 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
/// SSE 2 Only
// Convert scalar double to scalar single
-let isAsmParserOnly = 0 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -711,7 +672,6 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
-}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
Requires<[HasAVX]>;
@@ -723,7 +683,6 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
Requires<[HasSSE2, OptForSize]>;
-let isAsmParserOnly = 0 in
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
XS, VEX_4V;
@@ -732,7 +691,7 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
// Convert scalar single to scalar double
-let isAsmParserOnly = 0 in { // SSE2 instructions with XS prefix
+// SSE2 instructions with XS prefix
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -741,7 +700,6 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
-}
def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
Requires<[HasAVX]>;
@@ -754,7 +712,6 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, OptForSize]>;
-let isAsmParserOnly = 0 in {
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -767,7 +724,6 @@ def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
[(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
(load addr:$src2)))]>, XS, VEX_4V,
Requires<[HasAVX]>;
-}
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -788,7 +744,7 @@ def : Pat<(extloadf32 addr:$src),
Requires<[HasSSE2, OptForSpeed]>;
// Convert doubleword to packed single/double fp
-let isAsmParserOnly = 0 in { // SSE2 instructions without OpSize prefix
+// SSE2 instructions without OpSize prefix
def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -798,7 +754,6 @@ def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps
(bitconvert (memopv2i64 addr:$src))))]>,
TB, VEX, Requires<[HasAVX]>;
-}
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -810,7 +765,7 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
TB, Requires<[HasSSE2]>;
// FIXME: why the non-intrinsic version is described as SSE3?
-let isAsmParserOnly = 0 in { // SSE2 instructions with XS prefix
+// SSE2 instructions with XS prefix
def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -820,7 +775,6 @@ def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd
(bitconvert (memopv2i64 addr:$src))))]>,
XS, VEX, Requires<[HasAVX]>;
-}
def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -833,7 +787,6 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
// Convert packed single/double fp to doubleword
-let isAsmParserOnly = 0 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -842,13 +795,11 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-let isAsmParserOnly = 0 in {
def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
@@ -858,7 +809,6 @@ def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>, VEX;
-}
def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
@@ -867,7 +817,7 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>;
-let isAsmParserOnly = 0 in { // SSE2 packed instructions with XD prefix
+// SSE2 packed instructions with XD prefix
def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
@@ -877,7 +827,6 @@ def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq
(memop addr:$src)))]>,
XD, VEX, Requires<[HasAVX]>;
-}
def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
@@ -890,7 +839,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// Convert with truncation packed single/double fp to doubleword
-let isAsmParserOnly = 0 in { // SSE2 packed instructions with XS prefix
+// SSE2 packed instructions with XS prefix
def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -899,7 +848,6 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -910,7 +858,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
-let isAsmParserOnly = 0 in {
def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -921,9 +868,7 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(memop addr:$src)))]>,
XS, VEX, Requires<[HasAVX]>;
-}
-let isAsmParserOnly = 0 in {
def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
@@ -934,7 +879,6 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>, VEX;
-}
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -943,7 +887,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>;
-let isAsmParserOnly = 0 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -963,10 +906,9 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
// Convert packed single to packed double
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
@@ -982,7 +924,6 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
-let isAsmParserOnly = 0 in {
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -992,7 +933,6 @@ def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
VEX, Requires<[HasAVX]>;
-}
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1004,7 +944,6 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
TB, Requires<[HasSSE2]>;
// Convert packed double to packed single
-let isAsmParserOnly = 0 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -1024,14 +963,12 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
-let isAsmParserOnly = 0 in {
def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1040,7 +977,6 @@ def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
-}
def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1109,7 +1045,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
asm_alt, []>;
}
-let neverHasSideEffects = 1, isAsmParserOnly = 0 in {
+let neverHasSideEffects = 1 in {
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
@@ -1120,13 +1056,37 @@ let neverHasSideEffects = 1, isAsmParserOnly = 0 in {
XD, VEX_4V;
}
+let Constraints = "$src1 = $dst" in {
+def CMPSSrr : SIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc),
+ "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS;
+def CMPSSrm : SIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc),
+ "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS;
+def CMPSDrr : SIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc),
+ "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD;
+def CMPSDrm : SIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc),
+ "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD;
+}
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- defm CMPSS : sse12_cmp_scalar<FR32, f32mem,
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
- defm CMPSD : sse12_cmp_scalar<FR64, f64mem,
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
+def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
+def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
+def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
+def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
}
multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
@@ -1142,14 +1102,12 @@ multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
}
// Aliases to match intrinsics which expect XMM operand(s).
-let isAsmParserOnly = 0 in {
- defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
- "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
- XS, VEX_4V;
- defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
- "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
- XD, VEX_4V;
-}
+defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XS, VEX_4V;
+defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
@@ -1172,28 +1130,26 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
}
let Defs = [EFLAGS] in {
- let isAsmParserOnly = 0 in {
- defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, VEX;
- defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, OpSize, VEX;
- let Pattern = []<dag> in {
- defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
- "comiss", SSEPackedSingle>, VEX;
- defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
- "comisd", SSEPackedDouble>, OpSize, VEX;
- }
-
- defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
- load, "ucomiss", SSEPackedSingle>, VEX;
- defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
- load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
-
- defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
- load, "comiss", SSEPackedSingle>, VEX;
- defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
- load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, VEX;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, VEX;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, OpSize, VEX;
}
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, OpSize, VEX;
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", SSEPackedSingle>, TB;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
@@ -1239,24 +1195,22 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
asm_alt, [], d>;
}
-let isAsmParserOnly = 0 in {
- defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
-}
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
@@ -1296,20 +1250,18 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
(vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
}
-let isAsmParserOnly = 0 in {
- defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv4f32, SSEPackedSingle>, TB, VEX_4V;
- defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv8f32, SSEPackedSingle>, TB, VEX_4V;
- defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
- defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
-}
+defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -1342,33 +1294,31 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
}
let AddedComplexity = 10 in {
- let isAsmParserOnly = 0 in {
- defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
- VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
- VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
- VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
- VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
-
- defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
- VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
- VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
- VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
- VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- }
+ defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+
+ defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
@@ -1401,35 +1351,46 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
}
// Mask creation
+defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
SSEPackedSingle>, TB;
defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
-let isAsmParserOnly = 0 in {
- defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
- "movmskps", SSEPackedSingle>, VEX;
- defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
- defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
- "movmskps", SSEPackedSingle>, VEX;
- defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
+// X86fgetsign
+def MOVMSKPDrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
+def MOVMSKPDrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
+def MOVMSKPSrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
+ "movmskps\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
+def MOVMSKPSrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
+ "movmskps\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
- // Assembler Only
- def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
- def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
-}
+// Assembler Only
+def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
@@ -1484,13 +1445,11 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
///
multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
- let isAsmParserOnly = 0 in {
- defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
- defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
- }
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
@@ -1516,7 +1475,7 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode, int HasPat = 0,
list<list<dag>> Pattern = []> {
- let isAsmParserOnly = 0, Pattern = []<dag> in {
+ let Pattern = []<dag> in {
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
!if(HasPat, Pattern[0], // rr
@@ -1563,7 +1522,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
///
-let isAsmParserOnly = 0 in {
multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
@@ -1571,7 +1529,6 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
}
-}
// AVX 256-bit packed logical ops forms
defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
@@ -1669,38 +1626,36 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
}
// Binary Arithmetic instructions
-let isAsmParserOnly = 0 in {
- defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_s_int<0x58, "add", 0>,
- basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
- defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
- let isCommutable = 0 in {
- defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
- defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
- defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
- basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
- defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p_y_int<0x5D, "min">,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
- }
+let isCommutable = 0 in {
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -1901,7 +1856,7 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// Square root.
defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
@@ -1957,67 +1912,54 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
// SSE 1 & 2 - Non-temporal stores
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 0 in {
- def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
- def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
-
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
-
- let AddedComplexity = 400 in { // Prefer non-temporal versions
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX;
- def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
+ [(alignednontemporalstore (v2f64 VR128:$src),
addr:$dst)]>, VEX;
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)]>, VEX;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX;
- def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+ def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
+ [(alignednontemporalstore (v4f64 VR256:$src),
addr:$dst)]>, VEX;
- }
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
}
def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
@@ -2027,18 +1969,6 @@ def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
(VMOVNTPSYmr addr:$dst, VR256:$src)>;
-def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
-def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-
let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -2056,22 +1986,19 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>;
+
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti\t{$src, $dst|$dst, $src}",
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
TB, Requires<[HasSSE2]>;
-
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movnti\t{$src, $dst|$dst, $src}",
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
TB, Requires<[HasSSE2]>;
-
}
-def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
- TB, Requires<[HasSSE2]>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Misc Instructions (No AVX form)
@@ -2079,13 +2006,13 @@ def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
// Prefetch intrinsic.
def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
@@ -2136,16 +2063,23 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
+ Requires<[HasAVX, OptForSpeed]>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Load/Store XCSR register
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 0 in {
- def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
- def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
-}
+def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
@@ -2158,45 +2092,43 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let isAsmParserOnly = 0 in {
- let neverHasSideEffects = 1 in {
- def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- }
- def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
- def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
-
- let canFoldAsLoad = 1, mayLoad = 1 in {
- def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- let Predicates = [HasAVX] in {
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- }
- }
+let neverHasSideEffects = 1 in {
+def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
- let mayStore = 1 in {
- def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i256mem:$dst, VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- let Predicates = [HasAVX] in {
- def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- }
- }
+}
+}
+
+let mayStore = 1 in {
+def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
}
let neverHasSideEffects = 1 in
@@ -2228,23 +2160,11 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
}
// Intrinsic forms of MOVDQU load and store
-let isAsmParserOnly = 0 in {
-let canFoldAsLoad = 1 in
-def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
- XS, VEX, Requires<[HasAVX]>;
def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
XS, VEX, Requires<[HasAVX]>;
-}
-let canFoldAsLoad = 1 in
-def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
- XS, Requires<[HasSSE2]>;
def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
@@ -2349,7 +2269,7 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
// 128-bit Integer Arithmetic
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
@@ -2439,7 +2359,7 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
VEX_4V;
@@ -2586,7 +2506,7 @@ let Predicates = [HasSSE2] in {
// SSE2 - Packed Integer Comparison Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
0>, VEX_4V;
defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
@@ -2640,7 +2560,7 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
// SSE2 - Packed Integer Pack Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
0, 0>, VEX_4V;
defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
@@ -2678,7 +2598,7 @@ def mi : Ii8<0x70, MRMSrcMem,
}
} // ExeDomain = SSEPackedInt
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let AddedComplexity = 5 in
defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
VEX;
@@ -2726,7 +2646,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
addr:$src2))))]>;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
0>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
@@ -2836,7 +2756,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
}
// Extract
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -2849,7 +2769,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
imm:$src2))]>;
// Insert
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
@@ -2868,13 +2788,11 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 0 in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
@@ -2887,7 +2805,6 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 0 in {
let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
@@ -2898,7 +2815,6 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
-}
let Uses = [EDI] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
@@ -2916,7 +2832,6 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Packed Double Int
-let isAsmParserOnly = 0 in {
def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2926,7 +2841,6 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
VEX;
-}
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2945,7 +2859,6 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
// Move Int Doubleword to Single Scalar
-let isAsmParserOnly = 0 in {
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
@@ -2954,7 +2867,6 @@ def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
VEX;
-}
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>;
@@ -2964,7 +2876,6 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
// Move Packed Doubleword Int to Packed Double Int
-let isAsmParserOnly = 0 in {
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2974,7 +2885,6 @@ def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
-}
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -3000,14 +2910,12 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
// Move Scalar Single to Double Int
-let isAsmParserOnly = 0 in {
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
-}
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>;
@@ -3016,7 +2924,7 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
// movd / movq to XMM register zero-extends
-let AddedComplexity = 15, isAsmParserOnly = 0 in {
+let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
@@ -3040,7 +2948,6 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
}
let AddedComplexity = 20 in {
-let isAsmParserOnly = 0 in
def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3066,7 +2973,6 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
-let isAsmParserOnly = 0 in
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3079,7 +2985,6 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
// Move Packed Quadword Int to Quadword Int
-let isAsmParserOnly = 0 in
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -3093,7 +2998,6 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
(f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
// Store / copy lower 64-bits of a XMM register.
-let isAsmParserOnly = 0 in
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
@@ -3101,7 +3005,7 @@ def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
-let AddedComplexity = 20, isAsmParserOnly = 0 in
+let AddedComplexity = 20 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3126,7 +3030,7 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
-let isAsmParserOnly = 0, AddedComplexity = 15 in
+let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
@@ -3137,7 +3041,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[HasSSE2]>;
-let AddedComplexity = 20, isAsmParserOnly = 0 in
+let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl
@@ -3155,7 +3059,6 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
}
// Instructions to match in the assembler
-let isAsmParserOnly = 0 in {
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
@@ -3163,13 +3066,12 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
// Recognize "movd" with GR64 destination, but encode as a "movq"
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
-}
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3211,7 +3113,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
//===---------------------------------------------------------------------===//
// Convert Packed Double FP to Packed DW Integers
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -3239,7 +3141,7 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
// Convert Packed DW Integers to Packed Double FP
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3290,7 +3192,7 @@ def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// FIXME: Merge above classes when we have patterns for the ymm version
defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
@@ -3321,7 +3223,7 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
[]>;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// FIXME: Merge above classes when we have patterns for the ymm version
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
@@ -3329,7 +3231,7 @@ let isAsmParserOnly = 0, Predicates = [HasAVX] in {
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
// Move Unaligned Integer
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
@@ -3393,7 +3295,7 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX],
+let Predicates = [HasAVX],
ExeDomain = SSEPackedDouble in {
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
f128mem, 0>, TB, XD, VEX_4V;
@@ -3446,7 +3348,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
int_x86_sse3_hadd_ps, 0>, VEX_4V;
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
@@ -3498,7 +3400,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (mem_frag128 addr:$src))))]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
int_x86_ssse3_pabs_b_128>, VEX;
defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
@@ -3540,7 +3442,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
@@ -3632,7 +3534,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
[]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PALIGN : ssse3_palign<"palignr">;
@@ -3696,6 +3598,16 @@ let Predicates = [HasSSE2] in
def : Pat<(fextend (loadf32 addr:$src)),
(CVTSS2SDrm addr:$src)>;
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+let Predicates = [HasAVX] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
+ addr:$src)>;
+
// bit_convert
let Predicates = [HasXMMInt] in {
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
@@ -3987,7 +3899,7 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
VEX;
defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
@@ -4053,7 +3965,7 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
VEX;
defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
@@ -4094,7 +4006,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
VEX;
defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
@@ -4136,7 +4048,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
@@ -4158,7 +4070,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -4180,7 +4092,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
@@ -4201,7 +4113,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize, REX_W;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
@@ -4224,7 +4136,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
@@ -4264,7 +4176,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3))]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
@@ -4290,7 +4202,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
@@ -4316,7 +4228,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -4349,7 +4261,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
@@ -4519,7 +4431,7 @@ multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
}
// FP round - roundss, roundps, roundsd, roundpd
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// Intrinsic form
defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
memopv4f32, memopv2f64,
@@ -4554,7 +4466,7 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
-let Defs = [EFLAGS], isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
@@ -4597,7 +4509,7 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
OpSize, VEX;
}
-let Defs = [EFLAGS], isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
@@ -4646,7 +4558,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
(bitconvert (memopv8i16 addr:$src))))]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
int_x86_sse41_phminposuw>, VEX;
defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
@@ -4672,7 +4584,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
@@ -4739,7 +4651,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
@@ -4771,7 +4683,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
@@ -4812,7 +4724,7 @@ let Constraints = "$src1 = $dst" in {
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_frag, Intrinsic IntId> {
@@ -4851,14 +4763,14 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
OpSize;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId VR128:$src1,
(bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
@@ -4872,7 +4784,7 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
(PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
@@ -4906,7 +4818,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
0>, VEX_4V;
let Constraints = "$src1 = $dst" in
@@ -4938,8 +4850,7 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
-let Defs = [XMM0, EFLAGS], isAsmParserOnly = 0,
- Predicates = [HasAVX] in {
+let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
@@ -4974,7 +4885,7 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
}
-let isAsmParserOnly = 0, Predicates = [HasAVX],
+let Predicates = [HasAVX],
Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
@@ -5009,7 +4920,7 @@ let Defs = [ECX, EFLAGS] in {
}
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
VEX;
defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
@@ -5048,7 +4959,7 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
}
}
-let isAsmParserOnly = 0, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
VEX;
defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
@@ -5080,66 +4991,66 @@ defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
// This set of instructions are only rm, the only difference is the size
// of r and m.
let Constraints = "$src1 = $dst" in {
- def CRC32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+ def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i8mem:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_8 GR32:$src1,
+ (int_x86_sse42_crc32_32_8 GR32:$src1,
(load addr:$src2)))]>;
- def CRC32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+ def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR8:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>;
- def CRC32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
+ def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i16mem:$src2),
"crc32{w} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_16 GR32:$src1,
+ (int_x86_sse42_crc32_32_16 GR32:$src1,
(load addr:$src2)))]>,
OpSize;
- def CRC32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR16:$src2),
"crc32{w} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>,
+ (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
OpSize;
- def CRC32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"crc32{l} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_32 GR32:$src1,
+ (int_x86_sse42_crc32_32_32 GR32:$src1,
(load addr:$src2)))]>;
- def CRC32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"crc32{l} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>;
- def CRC64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+ (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
+ def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i8mem:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc64_8 GR64:$src1,
+ (int_x86_sse42_crc32_64_8 GR64:$src1,
(load addr:$src2)))]>,
REX_W;
- def CRC64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+ def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR8:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc64_8 GR64:$src1, GR8:$src2))]>,
+ (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
REX_W;
- def CRC64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
+ def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"crc32{q} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc64_64 GR64:$src1,
+ (int_x86_sse42_crc32_64_64 GR64:$src1,
(load addr:$src2)))]>,
REX_W;
- def CRC64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
+ def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"crc32{q} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc64_64 GR64:$src1, GR64:$src2))]>,
+ (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
REX_W;
}
@@ -5167,7 +5078,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
}
// Perform One Round of an AES Encryption/Decryption Flow
-let isAsmParserOnly = 0, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
int_x86_aesni_aesenc, 0>, VEX_4V;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
@@ -5207,7 +5118,7 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
(AESDECLASTrm VR128:$src1, addr:$src2)>;
// Perform the AES InvMixColumn Transformation
-let isAsmParserOnly = 0, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
@@ -5235,7 +5146,7 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
OpSize;
// AES Round Key Generation Assist
-let isAsmParserOnly = 0, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -5271,7 +5182,6 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
// Only the AVX version of CLMUL instructions are described here.
// Carry-less Multiplication instructions
-let isAsmParserOnly = 0 in {
def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -5297,13 +5207,10 @@ defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
-} // isAsmParserOnly
-
//===----------------------------------------------------------------------===//
// AVX Instructions
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 0 in {
// Load from memory and broadcast to all elements of the destination operand
class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -5437,8 +5344,6 @@ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
[(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
-} // isAsmParserOnly
-
def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 2710425..f73cff3 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -34,9 +34,16 @@ let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
[(int_x86_int (i8 3))]>;
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+
def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)]>;
+
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 6686214..2e1ec63 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -15,7 +15,9 @@
#include "X86TargetMachine.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -69,7 +71,22 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
DwarfUsesInlineInfoSection = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfTable;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::Create(4, Context);
+ return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
+X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
+ : X86MCAsmInfoDarwin(Triple) {
}
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
@@ -89,7 +106,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfTable;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
// OpenBSD has buggy support for .quad in 32-bit mode, just split into two
// .words.
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
index 5815225..2cd4c8e 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -25,6 +25,14 @@ namespace llvm {
explicit X86MCAsmInfoDarwin(const Triple &Triple);
};
+ struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+ explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+ virtual const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const;
+ };
+
struct X86ELFMCAsmInfo : public MCAsmInfo {
explicit X86ELFMCAsmInfo(const Triple &Triple);
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index a2bd638..55aceba 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -514,7 +514,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
}
// To only check operands before the memory address ones, start
- // the search from the begining
+ // the search from the beginning
if (IsDestMem)
CurOp = 0;
@@ -1015,7 +1015,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
} else {
unsigned FixupKind;
// FIXME: Is there a better way to know that we need a signed relocation?
- if (MI.getOpcode() == X86::MOV64ri32 ||
+ if (MI.getOpcode() == X86::ADD64ri32 ||
+ MI.getOpcode() == X86::MOV64ri32 ||
MI.getOpcode() == X86::MOV64mi32 ||
MI.getOpcode() == X86::PUSH64i32)
FixupKind = X86::reloc_signed_4byte;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index cbe6db2..793156f 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -355,10 +355,6 @@ ReSimplify:
assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
"LEA has segment specified!");
break;
- case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
- case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
- case X86::MOVSX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rr8); break;
- case X86::MOVSX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rm8); break;
case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1f464f4..1ad6203 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -73,29 +73,61 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
}
}
-/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
-/// specific numbering, used in debug info and exception tables.
-int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- unsigned Flavour = DWARFFlavour::X86_64;
-
+static unsigned getFlavour(const X86Subtarget *Subtarget, bool isEH) {
if (!Subtarget->is64Bit()) {
if (Subtarget->isTargetDarwin()) {
if (isEH)
- Flavour = DWARFFlavour::X86_32_DarwinEH;
+ return DWARFFlavour::X86_32_DarwinEH;
else
- Flavour = DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
} else if (Subtarget->isTargetCygMing()) {
// Unsupported by now, just quick fallback
- Flavour = DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
} else {
- Flavour = DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
}
}
+ return DWARFFlavour::X86_64;
+}
+
+/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
+/// specific numbering, used in debug info and exception tables.
+int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ unsigned Flavour = getFlavour(Subtarget, isEH);
return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
}
+/// getLLVMRegNum - This function maps DWARF register numbers to LLVM register.
+int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ unsigned Flavour = getFlavour(Subtarget, isEH);
+
+ return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour);
+}
+
+int
+X86RegisterInfo::getSEHRegNum(unsigned i) const {
+ int reg = getX86RegNum(i);
+ switch (i) {
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ reg += 8;
+ }
+ return reg;
+}
+
/// getX86RegNum - This function maps LLVM register identifiers to their X86
/// specific numbering, which is used in various places encoding instructions.
unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
@@ -229,19 +261,13 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
}
break;
case X86::sub_8bit_hi:
- if (B == &X86::GR8_ABCD_HRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
- A == &X86::GR64_NOREXRegClass ||
- A == &X86::GR64_NOSPRegClass ||
- A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_ABCDRegClass;
- else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
- A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
- return &X86::GR32_ABCDRegClass;
- else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
- A == &X86::GR16_NOREXRegClass)
- return &X86::GR16_ABCDRegClass;
- }
+ if (B->hasSubClassEq(&X86::GR8_ABCD_HRegClass))
+ switch (A->getSize()) {
+ case 2: return getCommonSubClass(A, &X86::GR16_ABCDRegClass);
+ case 4: return getCommonSubClass(A, &X86::GR32_ABCDRegClass);
+ case 8: return getCommonSubClass(A, &X86::GR64_ABCDRegClass);
+ default: return 0;
+ }
break;
case X86::sub_16bit:
if (B == &X86::GR16RegClass) {
@@ -285,9 +311,16 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
A == &X86::GR64_NOREX_NOSPRegClass)
return &X86::GR64_ABCDRegClass;
} else if (B == &X86::GR32_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREX_NOSPRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::GR32_NOREX_NOSPRegClass) {
if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_NOREXRegClass;
+ return &X86::GR64_NOREX_NOSPRegClass;
else if (A == &X86::GR64_ABCDRegClass)
return &X86::GR64_ABCDRegClass;
}
@@ -308,6 +341,33 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return 0;
}
+const TargetRegisterClass*
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ do {
+ switch (Super->getID()) {
+ case X86::GR8RegClassID:
+ case X86::GR16RegClassID:
+ case X86::GR32RegClassID:
+ case X86::GR64RegClassID:
+ case X86::FR32RegClassID:
+ case X86::FR64RegClassID:
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ case X86::VR128RegClassID:
+ case X86::VR256RegClassID:
+ // Don't return a super-class that would shrink the spill size.
+ // That can happen with the vector and float classes.
+ if (Super->getSize() == RC->getSize())
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
const TargetRegisterClass *
X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
switch (Kind) {
@@ -446,6 +506,34 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::ST5);
Reserved.set(X86::ST6);
Reserved.set(X86::ST7);
+
+ // Mark the segment registers as reserved.
+ Reserved.set(X86::CS);
+ Reserved.set(X86::SS);
+ Reserved.set(X86::DS);
+ Reserved.set(X86::ES);
+ Reserved.set(X86::FS);
+ Reserved.set(X86::GS);
+
+ // Reserve the registers that only exist in 64-bit mode.
+ if (!Is64Bit) {
+ for (unsigned n = 0; n != 8; ++n) {
+ const unsigned GPR64[] = {
+ X86::R8, X86::R9, X86::R10, X86::R11,
+ X86::R12, X86::R13, X86::R14, X86::R15
+ };
+ for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI;
+ ++AI)
+ Reserved.set(Reg);
+
+ // XMM8, XMM9, ...
+ assert(X86::XMM15 == X86::XMM8+7);
+ for (const unsigned *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
+ ++AI)
+ Reserved.set(Reg);
+ }
+ }
+
return Reserved;
}
@@ -470,7 +558,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
// FIXME: It's more complicated than this...
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
- "Stack realignment in presense of dynamic allocas is not supported");
+ "Stack realignment in presence of dynamic allocas is not supported");
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index cccddfa..dd3d3dc 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -80,6 +80,10 @@ public:
/// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
/// (created by TableGen) for target dependencies.
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+ // FIXME: This should be tablegen'd like getDwarfRegNum is
+ int getSEHRegNum(unsigned i) const;
/// Code Generation virtual methods...
///
@@ -91,6 +95,9 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 612fac2..590b38b 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -41,80 +41,83 @@ let Namespace = "X86" in {
// 8-bit registers
// Low registers
- def AL : Register<"al">, DwarfRegNum<[0, 0, 0]>;
- def DL : Register<"dl">, DwarfRegNum<[1, 2, 2]>;
- def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>;
- def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>;
-
- // X86-64 only
- def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>;
- def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>;
- def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>;
- def SPL : Register<"spl">, DwarfRegNum<[7, 5, 4]>;
- def R8B : Register<"r8b">, DwarfRegNum<[8, -2, -2]>;
- def R9B : Register<"r9b">, DwarfRegNum<[9, -2, -2]>;
- def R10B : Register<"r10b">, DwarfRegNum<[10, -2, -2]>;
- def R11B : Register<"r11b">, DwarfRegNum<[11, -2, -2]>;
- def R12B : Register<"r12b">, DwarfRegNum<[12, -2, -2]>;
- def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>;
- def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
- def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
+ def AL : Register<"al">;
+ def DL : Register<"dl">;
+ def CL : Register<"cl">;
+ def BL : Register<"bl">;
+
+ // X86-64 only, requires REX.
+ let CostPerUse = 1 in {
+ def SIL : Register<"sil">;
+ def DIL : Register<"dil">;
+ def BPL : Register<"bpl">;
+ def SPL : Register<"spl">;
+ def R8B : Register<"r8b">;
+ def R9B : Register<"r9b">;
+ def R10B : Register<"r10b">;
+ def R11B : Register<"r11b">;
+ def R12B : Register<"r12b">;
+ def R13B : Register<"r13b">;
+ def R14B : Register<"r14b">;
+ def R15B : Register<"r15b">;
+ }
// High registers. On x86-64, these cannot be used in any instruction
// with a REX prefix.
- def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>;
- def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>;
- def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>;
- def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
+ def AH : Register<"ah">;
+ def DH : Register<"dh">;
+ def CH : Register<"ch">;
+ def BH : Register<"bh">;
// 16-bit registers
let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
- def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
- def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
- def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
- def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+ def AX : RegisterWithSubRegs<"ax", [AL,AH]>;
+ def DX : RegisterWithSubRegs<"dx", [DL,DH]>;
+ def CX : RegisterWithSubRegs<"cx", [CL,CH]>;
+ def BX : RegisterWithSubRegs<"bx", [BL,BH]>;
}
let SubRegIndices = [sub_8bit] in {
- def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
- def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
- def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
- def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+ def SI : RegisterWithSubRegs<"si", [SIL]>;
+ def DI : RegisterWithSubRegs<"di", [DIL]>;
+ def BP : RegisterWithSubRegs<"bp", [BPL]>;
+ def SP : RegisterWithSubRegs<"sp", [SPL]>;
}
- def IP : Register<"ip">, DwarfRegNum<[16]>;
-
- // X86-64 only
- let SubRegIndices = [sub_8bit] in {
- def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
- def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
- def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
- def R11W : RegisterWithSubRegs<"r11w", [R11B]>, DwarfRegNum<[11, -2, -2]>;
- def R12W : RegisterWithSubRegs<"r12w", [R12B]>, DwarfRegNum<[12, -2, -2]>;
- def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
- def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
- def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
+ def IP : Register<"ip">;
+
+ // X86-64 only, requires REX.
+ let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
+ def R8W : RegisterWithSubRegs<"r8w", [R8B]>;
+ def R9W : RegisterWithSubRegs<"r9w", [R9B]>;
+ def R10W : RegisterWithSubRegs<"r10w", [R10B]>;
+ def R11W : RegisterWithSubRegs<"r11w", [R11B]>;
+ def R12W : RegisterWithSubRegs<"r12w", [R12B]>;
+ def R13W : RegisterWithSubRegs<"r13w", [R13B]>;
+ def R14W : RegisterWithSubRegs<"r14w", [R14B]>;
+ def R15W : RegisterWithSubRegs<"r15w", [R15B]>;
}
// 32-bit registers
let SubRegIndices = [sub_16bit] in {
- def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
- def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
- def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
- def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[3, 3, 3]>;
- def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[4, 6, 6]>;
- def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
- def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
- def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
- def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
-
- // X86-64 only
- def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
- def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
- def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>;
- def R11D : RegisterWithSubRegs<"r11d", [R11W]>, DwarfRegNum<[11, -2, -2]>;
- def R12D : RegisterWithSubRegs<"r12d", [R12W]>, DwarfRegNum<[12, -2, -2]>;
- def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
- def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
- def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
- }
+ def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[-2, 0, 0]>;
+ def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[-2, 2, 2]>;
+ def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[-2, 1, 1]>;
+ def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[-2, 3, 3]>;
+ def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[-2, 6, 6]>;
+ def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[-2, 7, 7]>;
+ def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[-2, 4, 5]>;
+ def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[-2, 5, 4]>;
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[-2, 8, 8]>;
+
+ // X86-64 only, requires REX
+ let CostPerUse = 1 in {
+ def R8D : RegisterWithSubRegs<"r8d", [R8W]>;
+ def R9D : RegisterWithSubRegs<"r9d", [R9W]>;
+ def R10D : RegisterWithSubRegs<"r10d", [R10W]>;
+ def R11D : RegisterWithSubRegs<"r11d", [R11W]>;
+ def R12D : RegisterWithSubRegs<"r12d", [R12W]>;
+ def R13D : RegisterWithSubRegs<"r13d", [R13W]>;
+ def R14D : RegisterWithSubRegs<"r14d", [R14W]>;
+ def R15D : RegisterWithSubRegs<"r15d", [R15W]>;
+ }}
// 64-bit registers, X86-64 only
let SubRegIndices = [sub_32bit] in {
@@ -127,6 +130,8 @@ let Namespace = "X86" in {
def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+ // These also require REX.
+ let CostPerUse = 1 in {
def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
@@ -136,7 +141,7 @@ let Namespace = "X86" in {
def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
- }
+ }}
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
@@ -170,6 +175,7 @@ let Namespace = "X86" in {
def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
// X86-64 only
+ let CostPerUse = 1 in {
def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>;
def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>;
def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
@@ -178,26 +184,26 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
- }
+ }}
// YMM Registers, used by AVX instructions
let SubRegIndices = [sub_xmm] in {
- def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
- def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
- def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
- def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>;
- def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>;
- def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>;
- def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>;
- def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>;
- def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>;
- def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>;
- def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>;
- def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>;
- def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>;
- def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
- def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
- def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
+ def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegAlias<XMM0>;
+ def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegAlias<XMM1>;
+ def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegAlias<XMM2>;
+ def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegAlias<XMM3>;
+ def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegAlias<XMM4>;
+ def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegAlias<XMM5>;
+ def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegAlias<XMM6>;
+ def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegAlias<XMM7>;
+ def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegAlias<XMM8>;
+ def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegAlias<XMM9>;
+ def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegAlias<XMM10>;
+ def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegAlias<XMM11>;
+ def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegAlias<XMM12>;
+ def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegAlias<XMM13>;
+ def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegAlias<XMM14>;
+ def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>;
}
// Floating point stack registers
@@ -273,8 +279,8 @@ let Namespace = "X86" in {
// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
// cannot be encoded.
def GR8 : RegisterClass<"X86", [i8], 8,
- [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
- R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
+ (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -317,152 +323,38 @@ def GR8 : RegisterClass<"X86", [i8], 8,
}
def GR16 : RegisterClass<"X86", [i16], 16,
- [AX, CX, DX, SI, DI, BX, BP, SP,
- R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
+ (add AX, CX, DX, SI, DI, BX, BP, SP,
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned X86_GR16_AO_64[] = {
- X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
- X86::R8W, X86::R9W, X86::R10W, X86::R11W,
- X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP
- };
-
- GR16Class::iterator
- GR16Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return X86_GR16_AO_64;
- else
- return begin();
- }
-
- GR16Class::iterator
- GR16Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (Subtarget.is64Bit()) {
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate SP or BP.
- return array_endof(X86_GR16_AO_64) - 1;
- else
- // If not, just don't allocate SP.
- return array_endof(X86_GR16_AO_64);
- } else {
- // Does the function dedicate EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate SP or BP.
- return begin() + 6;
- else
- // If not, just don't allocate SP.
- return begin() + 7;
- }
- }
- }];
}
def GR32 : RegisterClass<"X86", [i32], 32,
- [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
- R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned X86_GR32_AO_64[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
- X86::R8D, X86::R9D, X86::R10D, X86::R11D,
- X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
- };
-
- GR32Class::iterator
- GR32Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return X86_GR32_AO_64;
- else
- return begin();
- }
-
- GR32Class::iterator
- GR32Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (Subtarget.is64Bit()) {
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate ESP or EBP.
- return array_endof(X86_GR32_AO_64) - 1;
- else
- // If not, just don't allocate ESP.
- return array_endof(X86_GR32_AO_64);
- } else {
- // Does the function dedicate EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate ESP or EBP.
- return begin() + 6;
- else
- // If not, just don't allocate ESP.
- return begin() + 7;
- }
- }
- }];
}
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
// address, but it doesn't cause trouble.
def GR64 : RegisterClass<"X86", [i64], 64,
- [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP, RIP)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
(GR16 sub_16bit),
(GR32 sub_32bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR64Class::iterator
- GR64Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (!Subtarget.is64Bit())
- return begin(); // None of these are allocatable in 32-bit.
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- return end()-3; // If so, don't allocate RIP, RSP or RBP
- else
- return end()-2; // If not, just don't allocate RIP or RSP
- }
- }];
}
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
// descriptor.
-def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>;
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
// Debug registers.
-def DEBUG_REG : RegisterClass<"X86", [i32], 32,
- [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>;
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>;
// Control registers.
-def CONTROL_REG : RegisterClass<"X86", [i64], 64,
- [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
- CR9, CR10, CR11, CR12, CR13, CR14, CR15]>;
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
@@ -470,38 +362,38 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64,
// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
// and GR64_ABCD are classes for registers that support 8-bit h-register
// operations.
-def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>;
-def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>;
-def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> {
let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
}
-def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> {
let SubRegClasses = [(GR8_ABCD_L sub_8bit),
(GR8_ABCD_H sub_8bit_hi),
(GR16_ABCD sub_16bit)];
}
-def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> {
let SubRegClasses = [(GR8_ABCD_L sub_8bit),
(GR8_ABCD_H sub_8bit_hi),
(GR16_ABCD sub_16bit),
(GR32_ABCD sub_32bit)];
}
-def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
+def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
}
-def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
- R8, R9, R11]> {
+def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+ R8, R9, R11, RIP)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
(GR16 sub_16bit),
(GR32_TC sub_32bit)];
}
-def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
- R8, R9, R11]>;
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+ R8, R9, R11)>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
- [AL, CL, DL, AH, CH, DH, BL, BH]> {
+ (add AL, CL, DL, AH, CH, DH, BL, BH)> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -535,232 +427,62 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
}
// GR16_NOREX - GR16 registers which do not require a REX prefix.
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
- [AX, CX, DX, SI, DI, BX, BP, SP]> {
+ (add AX, CX, DX, SI, DI, BX, BP, SP)> {
let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR16_NOREXClass::iterator
- GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP / EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate SP or BP.
- return end() - 2;
- else
- // If not, just don't allocate SP.
- return end() - 1;
- }
- }];
}
// GR32_NOREX - GR32 registers which do not require a REX prefix.
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
- [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> {
let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
(GR16_NOREX sub_16bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR32_NOREXClass::iterator
- GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP / EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate ESP or EBP.
- return end() - 2;
- else
- // If not, just don't allocate ESP.
- return end() - 1;
- }
- }];
}
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
- [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
+ (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> {
let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
(GR16_NOREX sub_16bit),
(GR32_NOREX sub_32bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR64_NOREXClass::iterator
- GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate RIP, RSP or RBP.
- return end() - 3;
- else
- // If not, just don't allocate RIP or RSP.
- return end() - 2;
- }
- }];
}
// GR32_NOSP - GR32 registers except ESP.
-def GR32_NOSP : RegisterClass<"X86", [i32], 32,
- [EAX, ECX, EDX, ESI, EDI, EBX, EBP,
- R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned X86_GR32_NOSP_AO_64[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
- X86::R8D, X86::R9D, X86::R10D, X86::R11D,
- X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
- };
-
- GR32_NOSPClass::iterator
- GR32_NOSPClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return X86_GR32_NOSP_AO_64;
- else
- return begin();
- }
-
- GR32_NOSPClass::iterator
- GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (Subtarget.is64Bit()) {
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate EBP.
- return array_endof(X86_GR32_NOSP_AO_64) - 1;
- else
- // If not, any reg in this class is ok.
- return array_endof(X86_GR32_NOSP_AO_64);
- } else {
- // Does the function dedicate EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate EBP.
- return begin() + 6;
- else
- // If not, any reg in this class is ok.
- return begin() + 7;
- }
- }
- }];
}
// GR64_NOSP - GR64 registers except RSP (and RIP).
-def GR64_NOSP : RegisterClass<"X86", [i64], 64,
- [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- RBX, R14, R15, R12, R13, RBP]> {
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
(GR16 sub_16bit),
(GR32_NOSP sub_32bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR64_NOSPClass::iterator
- GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (!Subtarget.is64Bit())
- return begin(); // None of these are allocatable in 32-bit.
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- return end()-1; // If so, don't allocate RBP
- else
- return end(); // If not, any reg in this class is ok.
- }
- }];
+}
+
+// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
+// ESP.
+def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
+ (and GR32_NOREX, GR32_NOSP)> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
}
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
- [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
+ (and GR64_NOREX, GR64_NOSP)> {
let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
(GR16_NOREX sub_16bit),
- (GR32_NOREX sub_32bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR64_NOREX_NOSPClass::iterator
- GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const
- {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate RBP.
- return end() - 1;
- else
- // If not, any reg in this class is ok.
- return end();
- }
- }];
+ (GR32_NOREX_NOSP sub_32bit)];
}
// A class to support the 'A' assembler constraint: EAX then EDX.
-def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> {
let SubRegClasses = [(GR8_ABCD_L sub_8bit),
(GR8_ABCD_H sub_8bit_hi),
(GR16_ABCD sub_16bit)];
}
// Scalar SSE2 floating point registers.
-def FR32 : RegisterClass<"X86", [f32], 32,
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11,
- XMM12, XMM13, XMM14, XMM15]> {
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- FR32Class::iterator
- FR32Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
- else
- return end();
- }
- }];
-}
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
-def FR64 : RegisterClass<"X86", [f64], 64,
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11,
- XMM12, XMM13, XMM14, XMM15]> {
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- FR64Class::iterator
- FR64Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
- else
- return end();
- }
- }];
-}
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
// FIXME: This sets up the floating point register files as though they are f64
@@ -769,85 +491,31 @@ def FR64 : RegisterClass<"X86", [f64], 64,
// faster on common hardware. In reality, this should be controlled by a
// command line option or something.
-def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
-def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
-def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
// Floating point stack registers (these are not allocatable by the
// register allocator - the floating point stackifier is responsible
// for transforming FPn allocations to STn registers)
-def RST : RegisterClass<"X86", [f80, f64, f32], 32,
- [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- RSTClass::iterator
- RSTClass::allocation_order_end(const MachineFunction &MF) const {
- return begin();
- }
- }];
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
+ let isAllocatable = 0;
}
// Generic vector registers: VR64 and VR128.
-def VR64: RegisterClass<"X86", [x86mmx], 64,
- [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
-def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11,
- XMM12, XMM13, XMM14, XMM15]> {
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (add FR32)> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
-
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- VR128Class::iterator
- VR128Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
- else
- return end();
- }
- }];
}
def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
- [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11,
- YMM12, YMM13, YMM14, YMM15]> {
+ (sequence "YMM%u", 0, 15)> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
-
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- VR256Class::iterator
- VR256Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode.
- else
- return end();
- }
- }];
}
// Status flags registers.
-def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
let CopyCost = -1; // Don't allow copying of status registers.
-
- // EFLAGS is not allocatable.
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- CCRClass::iterator
- CCRClass::allocation_order_end(const MachineFunction &MF) const {
- return allocation_order_begin(MF);
- }
- }];
+ let isAllocatable = 0;
}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 42e8193..02754f9 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -178,7 +178,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
- // This requires the copy size to be a constant, preferrably
+ // This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 1ee7312..481e821 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -144,7 +144,8 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
/// passed as the second argument. Otherwise it returns null.
const char *X86Subtarget::getBZeroEntry() const {
// Darwin 10 has a __bzero entry point for this purpose.
- if (getDarwinVers() >= 10)
+ if (getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 6))
return "__bzero";
return 0;
@@ -264,6 +265,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+ HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1);
HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 0a62a02..286a798 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -165,9 +165,15 @@ public:
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
- bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
- bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; }
- bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; }
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetFreeBSD() const {
+ return TargetTriple.getOS() == Triple::FreeBSD;
+ }
+ bool isTargetSolaris() const {
+ return TargetTriple.getOS() == Triple::Solaris;
+ }
// ELF is a reasonably sane default and the only other X86 targets we
// support are Darwin and Windows. Just use "not those".
@@ -215,13 +221,6 @@ public:
return PICStyle == PICStyles::StubDynamicNoPIC ||
PICStyle == PICStyles::StubPIC; }
- /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
- /// 10 = Snow Leopard, etc.
- unsigned getDarwinVers() const {
- if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber();
- return 0;
- }
-
/// ClassifyGlobalReference - Classify a global variable reference for the
/// current subtarget according to how we should reference it in a non-pcrel
/// context.
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 8fb9470..7483329 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -26,19 +26,18 @@ using namespace llvm;
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
- switch (TheTriple.getOS()) {
- case Triple::Darwin:
- return new X86MCAsmInfoDarwin(TheTriple);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (TheTriple.getEnvironment() == Triple::MachO)
- return new X86MCAsmInfoDarwin(TheTriple);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ if (TheTriple.getArch() == Triple::x86_64)
+ return new X86_64MCAsmInfoDarwin(TheTriple);
else
- return new X86MCAsmInfoCOFF(TheTriple);
- default:
- return new X86ELFMCAsmInfo(TheTriple);
+ return new X86MCAsmInfoDarwin(TheTriple);
}
+
+ if (TheTriple.isOSWindows())
+ return new X86MCAsmInfoCOFF(TheTriple);
+
+ return new X86ELFMCAsmInfo(TheTriple);
}
static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
@@ -48,19 +47,14 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
bool RelaxAll,
bool NoExecStack) {
Triple TheTriple(TT);
- switch (TheTriple.getOS()) {
- case Triple::Darwin:
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (TheTriple.getEnvironment() == Triple::MachO)
- return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
- else
- return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
- default:
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
- }
+
+ if (TheTriple.isOSWindows())
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeX86Target() {
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index c15dfbb..1231798 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -38,6 +38,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
+MCSymbol *X8664_MachoTargetObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
if (TM.getRelocationModel() == Reloc::PIC_)
return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
@@ -52,7 +58,7 @@ unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
return DW_EH_PE_absptr;
}
-unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const {
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const {
if (TM.getRelocationModel() == Reloc::PIC_)
return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
else
@@ -91,17 +97,14 @@ unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
return DW_EH_PE_absptr;
}
-unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | (Model == CodeModel::Small ||
- Model == CodeModel::Medium ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const {
+ if (CFI)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- if (Model == CodeModel::Small || Model == CodeModel::Medium)
- return DW_EH_PE_udata4;
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- return DW_EH_PE_absptr;
+ return DW_EH_PE_udata4;
}
unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index f2fd49c..e21b5bf 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -25,6 +25,12 @@ namespace llvm {
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const;
+
+ // getCFIPersonalitySymbol - The symbol that gets passed to
+ // .cfi_personality.
+ virtual MCSymbol *
+ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const;
};
class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -34,7 +40,7 @@ namespace llvm {
:TM(tm) { }
virtual unsigned getPersonalityEncoding() const;
virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding() const;
+ virtual unsigned getFDEEncoding(bool CFI) const;
virtual unsigned getTTypeEncoding() const;
};
@@ -45,7 +51,7 @@ namespace llvm {
:TM(tm) { }
virtual unsigned getPersonalityEncoding() const;
virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding() const;
+ virtual unsigned getFDEEncoding(bool CFI) const;
virtual unsigned getTTypeEncoding() const;
};