aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp26
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp7
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c3
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h1
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp7
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h7
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp7
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp1
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp5
-rw-r--r--lib/Target/X86/README.txt18
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp3
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp3
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp3
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp165
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp461
-rw-r--r--lib/Target/X86/X86ISelLowering.h18
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td58
-rw-r--r--lib/Target/X86/X86InstrCompiler.td85
-rw-r--r--lib/Target/X86/X86InstrControl.td15
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td8
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp4
-rw-r--r--lib/Target/X86/X86InstrInfo.td5
-rw-r--r--lib/Target/X86/X86InstrSSE.td243
-rw-r--r--lib/Target/X86/X86Subtarget.cpp5
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp1
28 files changed, 750 insertions, 421 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 9e88472..08c732c 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -17,7 +17,6 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
@@ -951,20 +950,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
- bool IsVCMP = PatchedName.startswith("vcmp");
+ bool IsVCMP = PatchedName[0] == 'v';
unsigned SSECCIdx = IsVCMP ? 4 : 3;
unsigned SSEComparisonCode = StringSwitch<unsigned>(
PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
- .Case("eq", 0)
- .Case("lt", 1)
- .Case("le", 2)
- .Case("unord", 3)
- .Case("neq", 4)
- .Case("nlt", 5)
- .Case("nle", 6)
- .Case("ord", 7)
- .Case("eq_uq", 8)
- .Case("nge", 9)
+ .Case("eq", 0x00)
+ .Case("lt", 0x01)
+ .Case("le", 0x02)
+ .Case("unord", 0x03)
+ .Case("neq", 0x04)
+ .Case("nlt", 0x05)
+ .Case("nle", 0x06)
+ .Case("ord", 0x07)
+ /* AVX only from here */
+ .Case("eq_uq", 0x08)
+ .Case("nge", 0x09)
.Case("ngt", 0x0A)
.Case("false", 0x0B)
.Case("neq_oq", 0x0C)
@@ -988,7 +988,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
.Case("gt_oq", 0x1E)
.Case("true_us", 0x1F)
.Default(~0U);
- if (SSEComparisonCode != ~0U) {
+ if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
getParser().getContext());
if (PatchedName.endswith("ss")) {
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 8278bde..b13a006 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -322,7 +322,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
OperandType type = (OperandType)operand.type;
+ bool isBranch = false;
+ uint64_t pcrel = 0;
if (type == TYPE_RELv) {
+ isBranch = true;
+ pcrel = insn.startLocation +
+ insn.displacementOffset + insn.displacementSize;
switch (insn.displacementSize) {
default:
break;
@@ -373,8 +378,6 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
}
- bool isBranch = false;
- uint64_t pcrel = 0;
switch (type) {
case TYPE_XMM128:
mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index fbd81d2..6020877 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1527,6 +1527,9 @@ static int readOperands(struct InternalInstruction* insn) {
if (insn->spec->operands[index].type == TYPE_IMM3 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 7)
return -1;
+ if (insn->spec->operands[index].type == TYPE_IMM5 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 31)
+ return -1;
if (insn->spec->operands[index].type == TYPE_XMM128 ||
insn->spec->operands[index].type == TYPE_XMM256)
sawRegImm = 1;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index d2e30f1..13e1136 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -273,6 +273,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_IMM32, "4-byte") \
ENUM_ENTRY(TYPE_IMM64, "8-byte") \
ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
+ ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \
ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
ENUM_ENTRY(TYPE_RM16, "2-byte") \
ENUM_ENTRY(TYPE_RM32, "4-byte") \
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index b7ccb4c..5118e4c 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -19,6 +19,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
@@ -26,7 +28,6 @@
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
-#define GET_INSTRUCTION_NAME
#define PRINT_ALIAS_INSTR
#include "X86GenAsmWriter.inc"
@@ -49,10 +50,6 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
-StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
-}
-
void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
switch (MI->getOperand(Op).getImm()) {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index ff94301..2e00bff 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -22,12 +22,12 @@ class MCOperand;
class X86ATTInstPrinter : public MCInstPrinter {
public:
- X86ATTInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MRI) {}
+ X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
- virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
@@ -36,7 +36,6 @@ public:
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &OS);
static const char *getRegisterName(unsigned RegNo);
- static const char *getInstructionName(unsigned Opcode);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 46a96d2..4ea662c 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -17,15 +17,13 @@
#include "X86InstComments.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include <cctype>
using namespace llvm;
-// Include the auto-generated portion of the assembly writer.
-#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter1.inc"
void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
@@ -43,9 +41,6 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
if (CommentStream)
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
-StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
- return getInstructionName(Opcode);
-}
void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index ea1d38a..4f5938d 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -23,17 +23,16 @@ class MCOperand;
class X86IntelInstPrinter : public MCInstPrinter {
public:
- X86IntelInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MRI) {}
+ X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
- virtual StringRef getOpcodeName(unsigned Opcode) const;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- static const char *getInstructionName(unsigned Opcode);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 3f770f7..32e40fe 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -9,7 +9,6 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 003a14a..afa545c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -155,4 +155,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index efd18c7..3482363 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -474,12 +474,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI, MRI);
+ return new X86ATTInstPrinter(MAI, MII, MRI);
if (SyntaxVariant == 1)
- return new X86IntelInstPrinter(MAI, MRI);
+ return new X86IntelInstPrinter(MAI, MII, MRI);
return 0;
}
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index f9c1d35..6a8a4fd 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -2060,3 +2060,21 @@ Instead we could generate:
The trick is to match "fetch_and_add(X, -C) == C".
//===---------------------------------------------------------------------===//
+
+unsigned t(unsigned a, unsigned b) {
+ return a <= b ? 5 : -5;
+}
+
+We generate:
+ movl $5, %ecx
+ cmpl %esi, %edi
+ movl $-5, %eax
+ cmovbel %ecx, %eax
+
+GCC:
+ cmpl %edi, %esi
+ sbbl %eax, %eax
+ andl $-10, %eax
+ addl $5, %eax
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 32c722a..a802333 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -169,6 +169,9 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
+ if (Imm & 0x88)
+ return; // Not a shuffle
+
unsigned HalfSize = VT.getVectorNumElements()/2;
unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index f1cedf3..7db7ccb 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -19,7 +19,6 @@
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
@@ -265,8 +264,8 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
raw_ostream &O) {
unsigned char value = MI->getOperand(Op).getImm();
- assert(value <= 7 && "Invalid ssecc argument!");
switch (value) {
+ default: llvm_unreachable("Invalid ssecc argument!");
case 0: O << "eq"; break;
case 1: O << "lt"; break;
case 2: O << "le"; break;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3d63b7e..69752c5 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2179,7 +2179,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
namespace llvm {
- llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+ FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
return new X86FastISel(funcInfo);
}
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 936df27..ed1707d 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -28,7 +28,6 @@
#include "X86InstrInfo.h"
#include "llvm/InlineAsm.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -219,7 +218,7 @@ namespace {
/// getSTReg - Return the X86::ST(i) register which contains the specified
/// FP<RegNo> register.
unsigned getSTReg(unsigned RegNo) const {
- return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+ return StackTop - 1 - getSlot(RegNo) + X86::ST0;
}
// pushReg - Push the specified FP<n> register onto the stack.
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9405c2f..8e2b1d6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -36,7 +36,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -621,14 +620,14 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
// Handle X86-64 rip-relative addresses. We check this before checking direct
// folding because RIP is preferable to non-RIP accesses.
- if (Subtarget->is64Bit() &&
+ if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
// Under X86-64 non-small code model, GV (and friends) are 64-bits, so
// they cannot be folded into immediate fields.
// FIXME: This can be improved for kernel and other models?
- (M == CodeModel::Small || M == CodeModel::Kernel) &&
- // Base and index reg must be 0 in order to use %rip as base and lowering
- // must allow RIP.
- !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ // Base and index reg must be 0 in order to use %rip as base.
+ if (AM.hasBaseOrIndexReg())
+ return true;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
X86ISelAddressMode Backup = AM;
AM.GV = G->getGlobal();
@@ -663,11 +662,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
}
// Handle the case when globals fit in our immediate field: This is true for
- // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
- // mode, this results in a non-RIP-relative computation.
+ // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
+ // mode, this only applies to a non-RIP-relative computation.
if (!Subtarget->is64Bit() ||
- ((M == CodeModel::Small || M == CodeModel::Kernel) &&
- TM.getRelocationModel() == Reloc::Static)) {
+ M == CodeModel::Small || M == CodeModel::Kernel) {
+ assert(N.getOpcode() != X86ISD::WrapperRIP &&
+ "RIP-relative addressing already handled");
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
AM.GV = G->getGlobal();
AM.Disp += G->getOffset();
@@ -897,7 +897,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
MaskLZ);
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(X, MaskedHighBits, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
if (MaskedHighBits != KnownZero) return true;
// We've identified a pattern that can be transformed into a single shift
@@ -1848,6 +1848,96 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
return true;
}
+/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
+/// is suitable for doing the {load; increment or decrement; store} to modify
+/// transformation.
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
+ SDValue StoredVal, SelectionDAG *CurDAG,
+ LoadSDNode* &LoadNode, SDValue &InputChain) {
+
+ // is the value stored the result of a DEC or INC?
+ if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
+
+ // is the stored value result 0 of the load?
+ if (StoredVal.getResNo() != 0) return false;
+
+ // are there other uses of the loaded value than the inc or dec?
+ if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
+
+ // is the store non-extending and non-indexed?
+ if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
+ return false;
+
+ SDValue Load = StoredVal->getOperand(0);
+ // Is the stored value a non-extending and non-indexed load?
+ if (!ISD::isNormalLoad(Load.getNode())) return false;
+
+ // Return LoadNode by reference.
+ LoadNode = cast<LoadSDNode>(Load);
+ // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
+ EVT LdVT = LoadNode->getMemoryVT();
+ if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
+ LdVT != MVT::i8)
+ return false;
+
+ // Is store the only read of the loaded value?
+ if (!Load.hasOneUse())
+ return false;
+
+ // Is the address of the store the same as the load?
+ if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
+ LoadNode->getOffset() != StoreNode->getOffset())
+ return false;
+
+ // Check if the chain is produced by the load or is a TokenFactor with
+ // the load output chain as an operand. Return InputChain by reference.
+ SDValue Chain = StoreNode->getChain();
+
+ bool ChainCheck = false;
+ if (Chain == Load.getValue(1)) {
+ ChainCheck = true;
+ InputChain = LoadNode->getChain();
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
+ SmallVector<SDValue, 4> ChainOps;
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
+ SDValue Op = Chain.getOperand(i);
+ if (Op == Load.getValue(1)) {
+ ChainCheck = true;
+ continue;
+ }
+ ChainOps.push_back(Op);
+ }
+
+ if (ChainCheck)
+ // Make a new TokenFactor with all the other input chains except
+ // for the load.
+ InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(),
+ MVT::Other, &ChainOps[0], ChainOps.size());
+ }
+ if (!ChainCheck)
+ return false;
+
+ return true;
+}
+
+/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
+/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
+ if (Opc == X86ISD::DEC) {
+ if (LdVT == MVT::i64) return X86::DEC64m;
+ if (LdVT == MVT::i32) return X86::DEC32m;
+ if (LdVT == MVT::i16) return X86::DEC16m;
+ if (LdVT == MVT::i8) return X86::DEC8m;
+ } else {
+ assert(Opc == X86ISD::INC && "unrecognized opcode");
+ if (LdVT == MVT::i64) return X86::INC64m;
+ if (LdVT == MVT::i32) return X86::INC32m;
+ if (LdVT == MVT::i16) return X86::INC16m;
+ if (LdVT == MVT::i8) return X86::INC8m;
+ }
+ llvm_unreachable("unrecognized size for LdVT");
+}
+
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
@@ -2355,9 +2445,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
case ISD::STORE: {
+ // Change a chain of {load; incr or dec; store} of the same value into
+ // a simple increment or decrement through memory of that value, if the
+ // uses of the modified value and its address are suitable.
// The DEC64m tablegen pattern is currently not able to match the case where
- // the EFLAGS on the original DEC are used.
- // we'll need to improve tablegen to allow flags to be transferred from a
+ // the EFLAGS on the original DEC are used. (This also applies to
+ // {INC,DEC}X{64,32,16,8}.)
+ // We'll need to improve tablegen to allow flags to be transferred from a
// node in the pattern to the result node. probably with a new keyword
// for example, we have this
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
@@ -2367,44 +2461,17 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
// (transferrable EFLAGS)]>;
+
StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
- SDValue Chain = StoreNode->getOperand(0);
SDValue StoredVal = StoreNode->getOperand(1);
- SDValue Address = StoreNode->getOperand(2);
- SDValue Undef = StoreNode->getOperand(3);
-
- if (StoreNode->getMemOperand()->getSize() != 8 ||
- Undef->getOpcode() != ISD::UNDEF ||
- Chain->getOpcode() != ISD::LOAD ||
- StoredVal->getOpcode() != X86ISD::DEC ||
- StoredVal.getResNo() != 0 ||
- !StoredVal.getNode()->hasNUsesOfValue(1, 0) ||
- !Chain.getNode()->hasNUsesOfValue(1, 0) ||
- StoredVal->getOperand(0).getNode() != Chain.getNode())
- break;
+ unsigned Opc = StoredVal->getOpcode();
- //OPC_CheckPredicate, 1, // Predicate_nontemporalstore
- if (StoreNode->isNonTemporal())
+ LoadSDNode *LoadNode = 0;
+ SDValue InputChain;
+ if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
+ LoadNode, InputChain))
break;
- LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode());
- if (LoadNode->getOperand(1) != Address ||
- LoadNode->getOperand(2) != Undef)
- break;
-
- if (!ISD::isNormalLoad(LoadNode))
- break;
-
- if (!ISD::isNormalStore(StoreNode))
- break;
-
- // check load chain has only one use (from the store)
- if (!Chain.hasOneUse())
- break;
-
- // Merge the input chains if they are not intra-pattern references.
- SDValue InputChain = LoadNode->getOperand(0);
-
SDValue Base, Scale, Index, Disp, Segment;
if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
Base, Scale, Index, Disp, Segment))
@@ -2414,7 +2481,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
- MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m,
+ EVT LdVT = LoadNode->getMemoryVT();
+ unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
+ MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
Node->getDebugLoc(),
MVT::i32, MVT::Other, Ops,
array_lengthof(Ops));
@@ -2465,6 +2534,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel) {
return new X86DAGToDAGISel(TM, OptLevel);
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 88f3829..04299f3 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1578,18 +1578,20 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MVT::Other, &RetOps[0], RetOps.size());
}
-bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
+ SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
+ TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
@@ -1601,7 +1603,11 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
HasRet = true;
}
- return HasRet;
+ if (!HasRet)
+ return false;
+
+ Chain = TCChain;
+ return true;
}
EVT
@@ -2929,6 +2935,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::VPERMILP:
+ case X86ISD::VPERMI:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
}
@@ -3970,6 +3977,27 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
return Index / NumElemsPerChunk;
}
+/// getShuffleCLImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
+/// Handles 256-bit.
+static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ assert((VT.is256BitVector() && NumElts == 4) &&
+ "Unsupported vector type for VPERMQ/VPERMPD");
+
+ unsigned Mask = 0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt < 0)
+ continue;
+ Mask |= Elt << (i*2);
+ }
+
+ return Mask;
+}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
@@ -4402,6 +4430,7 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT,
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ if (Mask.empty()) return false;
break;
case X86ISD::MOVDDUP:
case X86ISD::MOVLHPD:
@@ -4852,41 +4881,42 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
return SDValue();
}
-/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
-/// a vbroadcast node. We support two patterns:
-/// 1. A splat BUILD_VECTOR which uses a single scalar load.
+/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
+/// to generate a splat value for the following cases:
+/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
-/// a scalar load.
-/// The scalar load node is returned when a pattern is found,
+/// a scalar load, or a constant.
+/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
-static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) {
+SDValue
+X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
if (!Subtarget->hasAVX())
return SDValue();
EVT VT = Op.getValueType();
- SDValue V = Op;
-
- if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
- //A suspected load to be broadcasted.
SDValue Ld;
+ bool ConstSplatVal;
- switch (V.getOpcode()) {
+ switch (Op.getOpcode()) {
default:
// Unknown pattern found.
return SDValue();
case ISD::BUILD_VECTOR: {
// The BUILD_VECTOR node must be a splat.
- if (!isSplatVector(V.getNode()))
+ if (!isSplatVector(Op.getNode()))
return SDValue();
- Ld = V.getOperand(0);
+ Ld = Op.getOperand(0);
+ ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
+ Ld.getOpcode() == ISD::ConstantFP);
// The suspected load node has several users. Make sure that all
// of its users are from the BUILD_VECTOR node.
- if (!Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
+ // Constants may have multiple users.
+ if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
return SDValue();
break;
}
@@ -4904,15 +4934,50 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) {
return SDValue();
Ld = Sc.getOperand(0);
+ ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
+ Ld.getOpcode() == ISD::ConstantFP);
// The scalar_to_vector node and the suspected
// load node must have exactly one user.
- if (!Sc.hasOneUse() || !Ld.hasOneUse())
+ // Constants may have multiple users.
+ if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse()))
return SDValue();
break;
}
}
+ bool Is256 = VT.getSizeInBits() == 256;
+ bool Is128 = VT.getSizeInBits() == 128;
+
+ // Handle the broadcasting a single constant scalar from the constant pool
+ // into a vector. On Sandybridge it is still better to load a constant vector
+ // from the constant pool and not to broadcast it from a scalar.
+ if (ConstSplatVal && Subtarget->hasAVX2()) {
+ EVT CVT = Ld.getValueType();
+ assert(!CVT.isVector() && "Must not broadcast a vector type");
+ unsigned ScalarSize = CVT.getSizeInBits();
+
+ if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) ||
+ (Is128 && (ScalarSize == 32))) {
+
+ const Constant *C = 0;
+ if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
+ C = CI->getConstantIntValue();
+ else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
+ C = CF->getConstantFPValue();
+
+ assert(C && "Invalid constant type");
+
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+ }
+ }
+
// The scalar source must be a normal load.
if (!ISD::isNormalLoad(Ld.getNode()))
return SDValue();
@@ -4921,28 +4986,26 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) {
if (Ld->hasAnyUseOfValue(1))
return SDValue();
- bool Is256 = VT.getSizeInBits() == 256;
- bool Is128 = VT.getSizeInBits() == 128;
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
// VBroadcast to YMM
if (Is256 && (ScalarSize == 32 || ScalarSize == 64))
- return Ld;
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// VBroadcast to XMM
if (Is128 && (ScalarSize == 32))
- return Ld;
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
// double since there is vbroadcastsd xmm
if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
// VBroadcast to YMM
if (Is256 && (ScalarSize == 8 || ScalarSize == 16))
- return Ld;
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// VBroadcast to XMM
if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64))
- return Ld;
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
// Unsupported broadcast.
@@ -4977,9 +5040,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
}
- SDValue LD = isVectorBroadcast(Op, Subtarget);
- if (LD.getNode())
- return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
+ SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ if (Broadcast.getNode())
+ return Broadcast;
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -5343,6 +5406,85 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
+// Try to lower a shuffle node into a simple blend instruction.
+static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT InVT = V1.getValueType();
+ int MaskSize = VT.getVectorNumElements();
+ int InSize = InVT.getVectorNumElements();
+
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ if (MaskSize != InSize)
+ return SDValue();
+
+ int ISDNo = 0;
+ MVT OpTy;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v8i16:
+ ISDNo = X86ISD::BLENDPW;
+ OpTy = MVT::v8i16;
+ break;
+ case MVT::v4i32:
+ case MVT::v4f32:
+ ISDNo = X86ISD::BLENDPS;
+ OpTy = MVT::v4f32;
+ break;
+ case MVT::v2i64:
+ case MVT::v2f64:
+ ISDNo = X86ISD::BLENDPD;
+ OpTy = MVT::v2f64;
+ break;
+ case MVT::v8i32:
+ case MVT::v8f32:
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ ISDNo = X86ISD::BLENDPS;
+ OpTy = MVT::v8f32;
+ break;
+ case MVT::v4i64:
+ case MVT::v4f64:
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ ISDNo = X86ISD::BLENDPD;
+ OpTy = MVT::v4f64;
+ break;
+ case MVT::v16i16:
+ if (!Subtarget->hasAVX2())
+ return SDValue();
+ ISDNo = X86ISD::BLENDPW;
+ OpTy = MVT::v16i16;
+ break;
+ }
+ assert(ISDNo && "Invalid Op Number");
+
+ unsigned MaskVals = 0;
+
+ for (int i = 0; i < MaskSize; ++i) {
+ int EltIdx = SVOp->getMaskElt(i);
+ if (EltIdx == i || EltIdx == -1)
+ MaskVals |= (1<<i);
+ else if (EltIdx == (i + MaskSize))
+ continue; // Bit is set to zero;
+ else return SDValue();
+ }
+
+ V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
+ SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2,
+ DAG.getConstant(MaskVals, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
+}
+
// v8i16 shuffles - Prefer shuffles in the following order:
// 1. [all] pshuflw, pshufhw, optional move
// 2. [ssse3] 1 x pshufb
@@ -5836,96 +5978,79 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
- int MinRange[2][2] = { { static_cast<int>(NumElems),
- static_cast<int>(NumElems) },
- { static_cast<int>(NumElems),
- static_cast<int>(NumElems) } };
- int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } };
+ DebugLoc dl = SVOp->getDebugLoc();
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+ SDValue Shufs[2];
- // Collect used ranges for each source in each lane
+ SmallVector<int, 16> Mask;
for (unsigned l = 0; l < 2; ++l) {
- unsigned LaneStart = l*NumLaneElems;
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ int InputUsed[2] = { -1, -1 }; // Not yet discovered.
+ unsigned LaneStart = l * NumLaneElems;
for (unsigned i = 0; i != NumLaneElems; ++i) {
+ // The mask element. This indexes into the input.
int Idx = SVOp->getMaskElt(i+LaneStart);
- if (Idx < 0)
+ if (Idx < 0) {
+ // the mask element does not index into any input vector.
+ Mask.push_back(-1);
continue;
-
- int Input = 0;
- if (Idx >= (int)NumElems) {
- Idx -= NumElems;
- Input = 1;
}
- if (Idx > MaxRange[l][Input])
- MaxRange[l][Input] = Idx;
- if (Idx < MinRange[l][Input])
- MinRange[l][Input] = Idx;
- }
- }
+ // The input vector this mask element indexes into.
+ int Input = Idx / NumLaneElems;
- // Make sure each range is 128-bits
- int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } };
- for (unsigned l = 0; l < 2; ++l) {
- for (unsigned Input = 0; Input < 2; ++Input) {
- if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0)
- continue;
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NumLaneElems;
- if (MinRange[l][Input] >= 0 && MaxRange[l][Input] < (int)NumLaneElems)
- ExtractIdx[l][Input] = 0;
- else if (MinRange[l][Input] >= (int)NumLaneElems &&
- MaxRange[l][Input] < (int)NumElems)
- ExtractIdx[l][Input] = NumLaneElems;
- else
- return SDValue();
- }
- }
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input)
+ // This input vector is already an operand.
+ break;
+ if (InputUsed[OpNo] < 0) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
- DebugLoc dl = SVOp->getDebugLoc();
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NVT = MVT::getVectorVT(EltVT, NumElems/2);
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up.
+ return SDValue();
+ }
- SDValue Ops[2][2];
- for (unsigned l = 0; l < 2; ++l) {
- for (unsigned Input = 0; Input < 2; ++Input) {
- if (ExtractIdx[l][Input] >= 0)
- Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input),
- DAG.getConstant(ExtractIdx[l][Input], MVT::i32),
- DAG, dl);
- else
- Ops[l][Input] = DAG.getUNDEF(NVT);
+ // Add the mask index for the new shuffle vector.
+ Mask.push_back(Idx + OpNo * NumLaneElems);
}
- }
- // Generate 128-bit shuffles
- SmallVector<int, 16> Mask1, Mask2;
- for (unsigned i = 0; i != NumLaneElems; ++i) {
- int Elt = SVOp->getMaskElt(i);
- if (Elt >= (int)NumElems) {
- Elt %= NumLaneElems;
- Elt += NumLaneElems;
- } else if (Elt >= 0) {
- Elt %= NumLaneElems;
- }
- Mask1.push_back(Elt);
- }
- for (unsigned i = NumLaneElems; i != NumElems; ++i) {
- int Elt = SVOp->getMaskElt(i);
- if (Elt >= (int)NumElems) {
- Elt %= NumLaneElems;
- Elt += NumLaneElems;
- } else if (Elt >= 0) {
- Elt %= NumLaneElems;
+ if (InputUsed[0] < 0) {
+ // No input vectors were used! The result is undefined.
+ Shufs[l] = DAG.getUNDEF(NVT);
+ } else {
+ SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
+ DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32),
+ DAG, dl);
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
+ Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
+ DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32),
+ DAG, dl);
+ // At least one input vector was used. Create a new shuffle vector.
+ Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
}
- Mask2.push_back(Elt);
- }
- SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]);
- SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]);
+ Mask.clear();
+ }
// Concatenate the result back
- SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1,
+ SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0],
DAG.getConstant(0, MVT::i32), DAG, dl);
- return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32),
+ return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32),
DAG, dl);
}
@@ -6203,10 +6328,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
getShuffleSHUFImmediate(SVOp), DAG);
}
-static
-SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI,
- const X86Subtarget *Subtarget) {
+SDValue
+X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
@@ -6222,9 +6345,9 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
int Size = VT.getSizeInBits();
// Use vbroadcast whenever the splat comes from a foldable load
- SDValue LD = isVectorBroadcast(Op, Subtarget);
- if (LD.getNode())
- return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD);
+ SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ if (Broadcast.getNode())
+ return Broadcast;
// Handle splats by matching through known shuffle masks
if ((Size == 128 && NumElem <= 4) ||
@@ -6309,7 +6432,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Normalize the input vectors. Here splats, zeroed vectors, profitable
// narrowing and commutation of operands should be handled. The actual code
// doesn't include all of those, work in progress...
- SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG);
if (NewOp.getNode())
return NewOp;
@@ -6524,6 +6647,27 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
+ SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG);
+ if (BlendOp.getNode())
+ return BlendOp;
+
+ if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
+ SmallVector<SDValue, 8> permclMask;
+ for (unsigned i = 0; i != 8; ++i) {
+ permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
+ }
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32,
+ &permclMask[0], 8);
+ // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
+ return DAG.getNode(X86ISD::VPERMV, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
+ }
+
+ if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64))
+ return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
+ getShuffleCLImmediate(SVOp), DAG);
+
+
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
// lower it into other known shuffles. FIXME: this isn't true yet, but
@@ -7182,8 +7326,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->resolveAliasedGlobal(false);
- TLSModel::Model model
- = getTLSModel(GV, getTargetMachine().getRelocationModel());
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
@@ -8099,8 +8242,8 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
unsigned BitWidth = Op0.getValueSizeInBits();
unsigned AndBitWidth = And.getValueSizeInBits();
if (BitWidth > AndBitWidth) {
- APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
- DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+ APInt Zeros, Ones;
+ DAG.ComputeMaskedBits(Op0, Zeros, Ones);
if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
return SDValue();
}
@@ -9449,12 +9592,12 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx2_vperm2i128:
return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::x86_avx_vpermil_ps:
- case Intrinsic::x86_avx_vpermil_pd:
- case Intrinsic::x86_avx_vpermil_ps_256:
- case Intrinsic::x86_avx_vpermil_pd_256:
- return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps:
+ // Operands intentionally swapped. Mask is last operand to intrinsic,
+ // but second operand for node/intruction.
+ return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(1));
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
@@ -10963,6 +11106,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
+ case X86ISD::BLENDPW: return "X86ISD::BLENDPW";
+ case X86ISD::BLENDPS: return "X86ISD::BLENDPS";
+ case X86ISD::BLENDPD: return "X86ISD::BLENDPD";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
@@ -11035,6 +11181,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
+ case X86ISD::VPERMV: return "X86ISD::VPERMV";
+ case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
@@ -11192,14 +11340,15 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
unsigned notOpc,
unsigned EAXreg,
const TargetRegisterClass *RC,
- bool invSrc) const {
+ bool Invert) const {
// For the atomic bitwise operator, we generate
// thisMBB:
// newMBB:
// ld t1 = [bitinstr.addr]
// op t2 = t1, [bitinstr.val]
+ // not t3 = t2 (if Invert)
// mov EAX = t1
- // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // lcs dest = [bitinstr.addr], t3 [EAX is implicit]
// bz newMBB
// fallthrough -->nextMBB
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -11247,13 +11396,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
- unsigned tt = F->getRegInfo().createVirtualRegister(RC);
- if (invSrc) {
- MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
- }
- else
- tt = t1;
-
unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
assert((argOpers[valArgIndx]->isReg() ||
argOpers[valArgIndx]->isImm()) &&
@@ -11262,16 +11404,23 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
else
MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
- MIB.addReg(tt);
+ MIB.addReg(t1);
(*MIB).addOperand(*argOpers[valArgIndx]);
+ unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+ if (Invert) {
+ MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2);
+ }
+ else
+ t3 = t2;
+
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
MIB.addReg(t1);
MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
- MIB.addReg(t2);
+ MIB.addReg(t3);
assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
@@ -11294,7 +11443,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
unsigned regOpcH,
unsigned immOpcL,
unsigned immOpcH,
- bool invSrc) const {
+ bool Invert) const {
// For the atomic bitwise operator, we generate
// thisMBB (instructions are in pairs, except cmpxchg8b)
// ld t1,t2 = [bitinstr.addr]
@@ -11302,6 +11451,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
// out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
// op t5, t6 <- out1, out2, [bitinstr.val]
// (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
+ // neg t7, t8 < t5, t6 (if Invert)
// mov ECX, EBX <- t5, t6
// mov EAX, EDX <- t1, t2
// cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
@@ -11385,16 +11535,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
.addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
// The subsequent operations should be using the destination registers of
- //the PHI instructions.
- if (invSrc) {
- t1 = F->getRegInfo().createVirtualRegister(RC);
- t2 = F->getRegInfo().createVirtualRegister(RC);
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
- } else {
- t1 = dest1Oper.getReg();
- t2 = dest2Oper.getReg();
- }
+ // the PHI instructions.
+ t1 = dest1Oper.getReg();
+ t2 = dest2Oper.getReg();
int valArgIndx = lastAddrIndx + 1;
assert((argOpers[valArgIndx]->isReg() ||
@@ -11421,15 +11564,26 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
MIB.addReg(t2);
(*MIB).addOperand(*argOpers[valArgIndx + 1]);
+ unsigned t7, t8;
+ if (Invert) {
+ t7 = F->getRegInfo().createVirtualRegister(RC);
+ t8 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5);
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6);
+ } else {
+ t7 = t5;
+ t8 = t6;
+ }
+
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB.addReg(t1);
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
MIB.addReg(t2);
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
- MIB.addReg(t5);
+ MIB.addReg(t7);
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
- MIB.addReg(t6);
+ MIB.addReg(t8);
MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
for (int i=0; i <= lastAddrIndx; ++i)
@@ -12620,11 +12774,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
//===----------------------------------------------------------------------===//
void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
+ unsigned BitWidth = KnownZero.getBitWidth();
unsigned Opc = Op.getOpcode();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
@@ -12633,7 +12787,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
switch (Opc) {
default: break;
case X86ISD::ADD:
@@ -12652,8 +12806,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
break;
// Fallthrough
case X86ISD::SETCC:
- KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
- Mask.getBitWidth() - 1);
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -12678,8 +12831,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break;
}
- KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
- Mask.getBitWidth() - NumLoBits);
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
break;
}
}
@@ -14000,13 +14152,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
- if (Mask.getOpcode() != ISD::BITCAST ||
- X.getOpcode() != ISD::BITCAST ||
- Y.getOpcode() != ISD::BITCAST)
- return SDValue();
-
// Look through mask bitcast.
- Mask = Mask.getOperand(0);
+ if (Mask.getOpcode() == ISD::BITCAST)
+ Mask = Mask.getOperand(0);
+ if (X.getOpcode() == ISD::BITCAST)
+ X = X.getOperand(0);
+ if (Y.getOpcode() == ISD::BITCAST)
+ Y = Y.getOperand(0);
+
EVT MaskVT = Mask.getValueType();
// Validate that the Mask operand is a vector sra node.
@@ -14027,8 +14180,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
// Now we know we at least have a plendvb with the mask val. See if
// we can form a psignb/w/d.
// psign = x.type == y.type == mask.type && y = sub(0, x);
- X = X.getOperand(0);
- Y = Y.getOperand(0);
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 0327b1f..09116e8 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -175,9 +175,14 @@ namespace llvm {
/// PSIGN - Copy integer sign.
PSIGN,
- /// BLEND family of opcodes
+ /// BLENDV - Blend where the selector is an XMM.
BLENDV,
+ /// BLENDxx - Blend where the selector is an immediate.
+ BLENDPW,
+ BLENDPS,
+ BLENDPD,
+
/// HADD - Integer horizontal add.
HADD,
@@ -280,6 +285,8 @@ namespace llvm {
UNPCKL,
UNPCKH,
VPERMILP,
+ VPERMV,
+ VPERMI,
VPERM2X128,
VBROADCAST,
@@ -504,7 +511,6 @@ namespace llvm {
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
@@ -781,6 +787,8 @@ namespace llvm {
// Utility functions to help LowerVECTOR_SHUFFLE
SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
+ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -804,7 +812,7 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
@@ -849,7 +857,7 @@ namespace llvm {
unsigned notOpc,
unsigned EAXreg,
const TargetRegisterClass *RC,
- bool invSrc = false) const;
+ bool Invert = false) const;
MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
MachineInstr *BInstr,
@@ -858,7 +866,7 @@ namespace llvm {
unsigned regOpcH,
unsigned immOpcL,
unsigned immOpcH,
- bool invSrc = false) const;
+ bool Invert = false) const;
/// Utility function to emit atomic min and max. It takes the min/max
/// instruction to expand, the associated basic block, and the associated
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 7fa7499..0eee083 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -53,7 +53,7 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>; // AL,AH = AL*GR8
+ (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
@@ -97,31 +97,32 @@ def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
let neverHasSideEffects = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
- // AL,AH = AL*GR8
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
+ IIC_IMUL8>; // AL,AH = AL*GR8
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize; // AX,DX = AX*GR16
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [],
+ IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
- // EAX,EDX = EAX*GR32
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [],
+ IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
- // RAX,RDX = RAX*GR64
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
+ IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
+ "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+ "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
+ // AX,DX = AX*[mem16]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
+ "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+ "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64]
}
} // neverHasSideEffects
@@ -639,10 +640,11 @@ class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
// BinOpRR - Instructions like "add reg, reg, reg".
class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- dag outlist, list<dag> pattern, Format f = MRMDestReg>
+ dag outlist, list<dag> pattern, InstrItinClass itin,
+ Format f = MRMDestReg>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
// just a regclass (no eflags) as a result.
@@ -650,7 +652,8 @@ class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst,
- (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ IIC_BIN_NONMEM>;
// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
// just a EFLAGS as a result.
@@ -659,7 +662,7 @@ class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRR<opcode, mnemonic, typeinfo, (outs),
[(set EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
- f>;
+ IIC_BIN_NONMEM, f>;
// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
// both a regclass and EFLAGS as a result.
@@ -667,7 +670,8 @@ class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ IIC_BIN_NONMEM>;
// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
// both a regclass and EFLAGS as a result, and has EFLAGS as input.
@@ -676,14 +680,14 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
- EFLAGS))]>;
+ EFLAGS))], IIC_BIN_NONMEM>;
// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
}
@@ -692,7 +696,7 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", []> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
}
@@ -702,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_MEM>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -738,7 +742,7 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -762,7 +766,6 @@ class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
-
// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
@@ -776,7 +779,7 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -853,7 +856,6 @@ class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src), addr:$dst),
(implicit EFLAGS)]>;
-
// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
@@ -1219,12 +1221,12 @@ let neverHasSideEffects = 1 in {
let isCommutable = 1 in
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V;
+ [], IIC_MUL8>, T8XD, VEX_4V;
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V;
+ [], IIC_MUL8>, T8XD, VEX_4V;
}
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 42a5014..6f9e849 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -301,34 +301,67 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
// String Pseudo Instructions
//
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)], IIC_REP_MOVS>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)], IIC_REP_MOVS>, REP;
+def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
+ Requires<[In32BitMode]>;
+def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
+ Requires<[In32BitMode]>;
+def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)], IIC_REP_MOVS>, REP,
+ Requires<[In32BitMode]>;
}
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
-def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
- [(X86rep_movs i64)], IIC_REP_MOVS>, REP;
-
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
+def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
+ Requires<[In64BitMode]>;
+def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)], IIC_REP_MOVS>, REP,
+ Requires<[In64BitMode]>;
+}
// FIXME: Should use "(X86rep_stos AL)" as the pattern.
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)], IIC_REP_STOS>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)], IIC_REP_STOS>, REP;
-
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
-def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
- [(X86rep_stos i64)], IIC_REP_STOS>, REP;
+let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
+ let Uses = [AL,ECX,EDI] in
+ def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)], IIC_REP_STOS>, REP,
+ Requires<[In32BitMode]>;
+ let Uses = [AX,ECX,EDI] in
+ def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
+ Requires<[In32BitMode]>;
+ let Uses = [EAX,ECX,EDI] in
+ def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)], IIC_REP_STOS>, REP,
+ Requires<[In32BitMode]>;
+}
+let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
+ let Uses = [AL,RCX,RDI] in
+ def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+ let Uses = [AX,RCX,RDI] in
+ def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
+ Requires<[In64BitMode]>;
+ let Uses = [RAX,RCX,RDI] in
+ def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+
+ let Uses = [RAX,RCX,RDI] in
+ def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)], IIC_REP_STOS>, REP,
+ Requires<[In64BitMode]>;
+}
//===----------------------------------------------------------------------===//
// Thread Local Storage Instructions
@@ -1134,12 +1167,10 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero0, KnownOne0;
- CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
APInt KnownZero1, KnownOne1;
- CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
return (~KnownZero0 & ~KnownZero1) == 0;
}]>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index ba86098..bf11fde 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -21,20 +21,25 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
[(X86retflag 0)], IIC_RET>;
+ def RETW : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret{w}",
+ [], IIC_RET>, OpSize;
def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret\t$amt",
[(X86retflag timm:$amt)], IIC_RET_IMM>;
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
- "retw\t$amt",
+ "ret{w}\t$amt",
[], IIC_RET_IMM>, OpSize;
def LRETL : I <0xCB, RawFrm, (outs), (ins),
- "lretl", [], IIC_RET>;
+ "{l}ret{l|f}", [], IIC_RET>;
+ def LRETW : I <0xCB, RawFrm, (outs), (ins),
+ "{l}ret{w|f}", [], IIC_RET>, OpSize;
def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
- "lretq", [], IIC_RET>;
+ "{l}ret{q|f}", [], IIC_RET>;
def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "lret\t$amt", [], IIC_RET>;
+ "{l}ret{l|f}\t$amt", [], IIC_RET>;
def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "lretw\t$amt", [], IIC_RET>, OpSize;
+ "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize;
}
// Unconditional branches.
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index ae3ed1b..35801e4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -126,6 +126,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
@@ -153,11 +155,17 @@ def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
+def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
+def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
+def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
+def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 307c96b..b12c1db 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1049,9 +1049,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 },
{ X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 },
{ X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 },
- { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 },
+ { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 },
{ X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 },
- { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 },
+ { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 },
{ X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 },
{ X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 },
{ X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 },
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index dd7cf50..6a25312 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -374,6 +374,11 @@ def SSECC : Operand<i8> {
let OperandType = "OPERAND_IMMEDIATE";
}
+def AVXCC : Operand<i8> {
+ let PrintMethod = "printSSECC";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
class ImmSExtAsmOperandClass : AsmOperandClass {
let SuperClasses = [ImmAsmOperand];
let RenderMethod = "addImmOperands";
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index df42627..65e3c1e 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2162,15 +2162,15 @@ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
- SDNode OpNode, ValueType VT, PatFrag ld_frag,
- string asm, string asm_alt,
+ Operand CC, SDNode OpNode, ValueType VT,
+ PatFrag ld_frag, string asm, string asm_alt,
OpndItins itins> {
def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
itins.rr>;
def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))],
itins.rm>;
@@ -2187,57 +2187,57 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
}
}
-defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSE_ALU_F32S>,
XS, VEX_4V, VEX_LIG;
-defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSE_ALU_F32S>, // same latency as 32 bit compare
XD, VEX_4V, VEX_LIG;
let Constraints = "$src1 = $dst" in {
- defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
XS;
- defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSE_ALU_F32S>, // same latency as 32 bit compare
XD;
}
-multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
Intrinsic Int, string asm, OpndItins itins> {
def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+ (ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))],
itins.rr>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, x86memop:$src, SSECC:$cc), asm,
+ (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
(load addr:$src), imm:$cc))],
itins.rm>;
}
// Aliases to match intrinsics which expect XMM operand(s).
-defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
SSE_ALU_F32S>,
XS, VEX_4V;
-defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
SSE_ALU_F32S>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
- defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
SSE_ALU_F32S>, XS;
- defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
SSE_ALU_F32S>, // same latency as f32
XD;
@@ -2308,50 +2308,50 @@ let Defs = [EFLAGS] in {
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
- Intrinsic Int, string asm, string asm_alt,
- Domain d> {
- let isAsmParserOnly = 1 in {
- def rri : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>;
- def rmi : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>;
- }
+ Operand CC, Intrinsic Int, string asm,
+ string asm_alt, Domain d> {
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
+ IIC_SSE_CMPP_RR, d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
+ IIC_SSE_CMPP_RM, d>;
// Accept explicit immediate argument form instead of comparison code.
- def rri_alt : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RR, d>;
- def rmi_alt : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RM, d>;
+ let neverHasSideEffects = 1 in {
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_CMPP_RR, d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_CMPP_RM, d>;
+ }
}
-defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedSingle>, TB, VEX_4V;
-defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedDouble>, TB, OpSize, VEX_4V;
-defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedSingle>, TB, VEX_4V;
-defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
- defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedSingle>, TB;
- defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedDouble>, TB, OpSize;
@@ -6331,11 +6331,11 @@ def : Pat<(f64 (ftrunc FR64:$src)),
let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
OpSize, VEX;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
OpSize, VEX;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
@@ -6351,11 +6351,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"ptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
OpSize;
}
@@ -6735,12 +6735,32 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
(v4f64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+
+ def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
+ (imm:$mask))),
+ (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+ def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
+ (imm:$mask))),
+ (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+
+ def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ (imm:$mask))),
+ (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (imm:$mask))),
+ (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (imm:$mask))),
+ (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
}
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
(VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
+ (imm:$mask))),
+ (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
}
/// SS41I_ternary_int - SSE 4.1 ternary operator
@@ -6789,6 +6809,17 @@ let Predicates = [HasSSE41] in {
def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
(v2f64 VR128:$src2))),
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+
+ def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+ (imm:$mask))),
+ (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+ (imm:$mask))),
+ (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
+ def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+ (imm:$mask))),
+ (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+
}
let Predicates = [HasAVX] in
@@ -7294,6 +7325,46 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
[]>, VEX_4V;
}
+let Predicates = [HasAVX] in {
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//
@@ -7664,45 +7735,47 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
//
multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- Intrinsic Int> {
+ ValueType OpVT> {
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V;
+ [(set VR256:$dst,
+ (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int VR256:$src1,
- (bitconvert (mem_frag addr:$src2))))]>,
+ [(set VR256:$dst,
+ (OpVT (X86VPermv VR256:$src1,
+ (bitconvert (mem_frag addr:$src2)))))]>,
VEX_4V;
}
-defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>;
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- Intrinsic Int> {
- def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
+ ValueType OpVT> {
+ def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX;
- def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
+ [(set VR256:$dst,
+ (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, VEX;
+ def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>,
- VEX;
+ [(set VR256:$dst,
+ (OpVT (X86VPermi (mem_frag addr:$src1),
+ (i8 imm:$src2))))]>, VEX;
}
-defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>,
- VEX_W;
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W;
let ExeDomain = SSEPackedDouble in
-defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
- VEX_W;
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
@@ -7743,18 +7816,17 @@ def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
//===----------------------------------------------------------------------===//
// VINSERTI128 - Insert packed integer values
//
+let neverHasSideEffects = 1 in {
def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst,
- (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>,
+ []>,
VEX_4V;
def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i128mem:$src2, i8imm:$src3),
"vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst,
- (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
- imm:$src3))]>, VEX_4V;
+ []>, VEX_4V;
+}
let Predicates = [HasAVX2], AddedComplexity = 1 in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
@@ -7775,47 +7847,6 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(INSERT_get_vinsertf128_imm VR256:$ins))>;
}
-// AVX1 patterns
-let Predicates = [HasAVX] in {
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
- (i32 imm)),
- (VINSERTF128rm VR256:$src1, addr:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
- (i32 imm)),
- (VINSERTF128rm VR256:$src1, addr:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
- (i32 imm)),
- (VINSERTF128rm VR256:$src1, addr:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-}
-
//===----------------------------------------------------------------------===//
// VEXTRACTI128 - Extract packed integer values
//
@@ -7830,7 +7861,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
(ins i128mem:$dst, VR256:$src1, i8imm:$src2),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(v2i64 (VEXTRACTI128rr
(v4i64 VR256:$src1),
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 3eb9441..ed1a409 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -21,7 +21,6 @@
#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallVector.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -425,7 +424,9 @@ bool X86Subtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ //TODO: change back to ANTIDEP_CRITICAL when the
+ // X86 subtarget properly sets up post RA liveness.
+ Mode = TargetSubtargetInfo::ANTIDEP_NONE;
CriticalPathRCs.clear();
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index c0d2a9c..718f35e 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -14,7 +14,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Dwarf.h"
using namespace llvm;
using namespace dwarf;