diff options
Diffstat (limited to 'lib/Target/X86')
28 files changed, 750 insertions, 421 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 9e88472..08c732c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -17,7 +17,6 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" @@ -951,20 +950,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { - bool IsVCMP = PatchedName.startswith("vcmp"); + bool IsVCMP = PatchedName[0] == 'v'; unsigned SSECCIdx = IsVCMP ? 4 : 3; unsigned SSEComparisonCode = StringSwitch<unsigned>( PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) - .Case("eq", 0) - .Case("lt", 1) - .Case("le", 2) - .Case("unord", 3) - .Case("neq", 4) - .Case("nlt", 5) - .Case("nle", 6) - .Case("ord", 7) - .Case("eq_uq", 8) - .Case("nge", 9) + .Case("eq", 0x00) + .Case("lt", 0x01) + .Case("le", 0x02) + .Case("unord", 0x03) + .Case("neq", 0x04) + .Case("nlt", 0x05) + .Case("nle", 0x06) + .Case("ord", 0x07) + /* AVX only from here */ + .Case("eq_uq", 0x08) + .Case("nge", 0x09) .Case("ngt", 0x0A) .Case("false", 0x0B) .Case("neq_oq", 0x0C) @@ -988,7 +988,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, .Case("gt_oq", 0x1E) .Case("true_us", 0x1F) .Default(~0U); - if (SSEComparisonCode != ~0U) { + if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, getParser().getContext()); if (PatchedName.endswith("ss")) { diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 8278bde..b13a006 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -322,7 +322,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, OperandType type = (OperandType)operand.type; + bool isBranch = false; + uint64_t pcrel = 0; if (type == TYPE_RELv) { + isBranch = true; + pcrel = insn.startLocation + + insn.displacementOffset + insn.displacementSize; switch (insn.displacementSize) { default: break; @@ -373,8 +378,6 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } } - bool isBranch = false; - uint64_t pcrel = 0; switch (type) { case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index fbd81d2..6020877 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1527,6 +1527,9 @@ static int readOperands(struct InternalInstruction* insn) { if (insn->spec->operands[index].type == TYPE_IMM3 && insn->immediates[insn->numImmediatesConsumed - 1] > 7) return -1; + if (insn->spec->operands[index].type == TYPE_IMM5 && + insn->immediates[insn->numImmediatesConsumed - 1] > 31) + return -1; if (insn->spec->operands[index].type == TYPE_XMM128 || insn->spec->operands[index].type == TYPE_XMM256) sawRegImm = 1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index d2e30f1..13e1136 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -273,6 +273,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_IMM32, "4-byte") \ ENUM_ENTRY(TYPE_IMM64, "8-byte") \ ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ + ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \ ENUM_ENTRY(TYPE_RM16, "2-byte") \ ENUM_ENTRY(TYPE_RM32, "4-byte") \ diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index b7ccb4c..5118e4c 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -19,6 +19,8 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" @@ -26,7 +28,6 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. -#define GET_INSTRUCTION_NAME #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" @@ -49,10 +50,6 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } -StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index ff94301..2e00bff 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -22,12 +22,12 @@ class MCOperand; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen, returns true if we successfully printed an // alias. @@ -36,7 +36,6 @@ public: // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 46a96d2..4ea662c 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -17,15 +17,13 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include <cctype> using namespace llvm; -// Include the auto-generated portion of the assembly writer. -#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { @@ -43,9 +41,6 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, if (CommentStream) EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } -StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index ea1d38a..4f5938d 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -23,17 +23,16 @@ class MCOperand; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 3f770f7..32e40fe 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -9,7 +9,6 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 003a14a..afa545c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -155,4 +155,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; + + // Exceptions handling + ExceptionsType = ExceptionHandling::DwarfCFI; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index efd18c7..3482363 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -474,12 +474,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createX86MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI, MRI); + return new X86ATTInstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(MAI, MRI); + return new X86IntelInstPrinter(MAI, MII, MRI); return 0; } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index f9c1d35..6a8a4fd 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -2060,3 +2060,21 @@ Instead we could generate: The trick is to match "fetch_and_add(X, -C) == C". //===---------------------------------------------------------------------===// + +unsigned t(unsigned a, unsigned b) { + return a <= b ? 5 : -5; +} + +We generate: + movl $5, %ecx + cmpl %esi, %edi + movl $-5, %eax + cmovbel %ecx, %eax + +GCC: + cmpl %edi, %esi + sbbl %eax, %eax + andl $-10, %eax + addl $5, %eax + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 32c722a..a802333 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -169,6 +169,9 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { + if (Imm & 0x88) + return; // Not a shuffle + unsigned HalfSize = VT.getVectorNumElements()/2; unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index f1cedf3..7db7ccb 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -19,7 +19,6 @@ #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" #include "InstPrinter/X86ATTInstPrinter.h" -#include "InstPrinter/X86IntelInstPrinter.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -265,8 +264,8 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O) { unsigned char value = MI->getOperand(Op).getImm(); - assert(value <= 7 && "Invalid ssecc argument!"); switch (value) { + default: llvm_unreachable("Invalid ssecc argument!"); case 0: O << "eq"; break; case 1: O << "lt"; break; case 2: O << "le"; break; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3d63b7e..69752c5 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2179,7 +2179,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { - llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { return new X86FastISel(funcInfo); } } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 936df27..ed1707d 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -28,7 +28,6 @@ #include "X86InstrInfo.h" #include "llvm/InlineAsm.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -219,7 +218,7 @@ namespace { /// getSTReg - Return the X86::ST(i) register which contains the specified /// FP<RegNo> register. unsigned getSTReg(unsigned RegNo) const { - return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; + return StackTop - 1 - getSlot(RegNo) + X86::ST0; } // pushReg - Push the specified FP<n> register onto the stack. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 9405c2f..8e2b1d6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -621,14 +620,14 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // Handle X86-64 rip-relative addresses. We check this before checking direct // folding because RIP is preferable to non-RIP accesses. - if (Subtarget->is64Bit() && + if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP && // Under X86-64 non-small code model, GV (and friends) are 64-bits, so // they cannot be folded into immediate fields. // FIXME: This can be improved for kernel and other models? - (M == CodeModel::Small || M == CodeModel::Kernel) && - // Base and index reg must be 0 in order to use %rip as base and lowering - // must allow RIP. - !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { + (M == CodeModel::Small || M == CodeModel::Kernel)) { + // Base and index reg must be 0 in order to use %rip as base. + if (AM.hasBaseOrIndexReg()) + return true; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { X86ISelAddressMode Backup = AM; AM.GV = G->getGlobal(); @@ -663,11 +662,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } // Handle the case when globals fit in our immediate field: This is true for - // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit - // mode, this results in a non-RIP-relative computation. + // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit + // mode, this only applies to a non-RIP-relative computation. if (!Subtarget->is64Bit() || - ((M == CodeModel::Small || M == CodeModel::Kernel) && - TM.getRelocationModel() == Reloc::Static)) { + M == CodeModel::Small || M == CodeModel::Kernel) { + assert(N.getOpcode() != X86ISD::WrapperRIP && + "RIP-relative addressing already handled"); if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { AM.GV = G->getGlobal(); AM.Disp += G->getOffset(); @@ -897,7 +897,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(), MaskLZ); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(X, MaskedHighBits, KnownZero, KnownOne); + DAG.ComputeMaskedBits(X, KnownZero, KnownOne); if (MaskedHighBits != KnownZero) return true; // We've identified a pattern that can be transformed into a single shift @@ -1848,6 +1848,96 @@ static bool HasNoSignedComparisonUses(SDNode *N) { return true; } +/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode +/// is suitable for doing the {load; increment or decrement; store} to modify +/// transformation. +static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, + SDValue StoredVal, SelectionDAG *CurDAG, + LoadSDNode* &LoadNode, SDValue &InputChain) { + + // is the value stored the result of a DEC or INC? + if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false; + + // is the stored value result 0 of the load? + if (StoredVal.getResNo() != 0) return false; + + // are there other uses of the loaded value than the inc or dec? + if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; + + // is the store non-extending and non-indexed? + if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) + return false; + + SDValue Load = StoredVal->getOperand(0); + // Is the stored value a non-extending and non-indexed load? + if (!ISD::isNormalLoad(Load.getNode())) return false; + + // Return LoadNode by reference. + LoadNode = cast<LoadSDNode>(Load); + // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) + EVT LdVT = LoadNode->getMemoryVT(); + if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && + LdVT != MVT::i8) + return false; + + // Is store the only read of the loaded value? + if (!Load.hasOneUse()) + return false; + + // Is the address of the store the same as the load? + if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || + LoadNode->getOffset() != StoreNode->getOffset()) + return false; + + // Check if the chain is produced by the load or is a TokenFactor with + // the load output chain as an operand. Return InputChain by reference. + SDValue Chain = StoreNode->getChain(); + + bool ChainCheck = false; + if (Chain == Load.getValue(1)) { + ChainCheck = true; + InputChain = LoadNode->getChain(); + } else if (Chain.getOpcode() == ISD::TokenFactor) { + SmallVector<SDValue, 4> ChainOps; + for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { + SDValue Op = Chain.getOperand(i); + if (Op == Load.getValue(1)) { + ChainCheck = true; + continue; + } + ChainOps.push_back(Op); + } + + if (ChainCheck) + // Make a new TokenFactor with all the other input chains except + // for the load. + InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(), + MVT::Other, &ChainOps[0], ChainOps.size()); + } + if (!ChainCheck) + return false; + + return true; +} + +/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory +/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC. +static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { + if (Opc == X86ISD::DEC) { + if (LdVT == MVT::i64) return X86::DEC64m; + if (LdVT == MVT::i32) return X86::DEC32m; + if (LdVT == MVT::i16) return X86::DEC16m; + if (LdVT == MVT::i8) return X86::DEC8m; + } else { + assert(Opc == X86ISD::INC && "unrecognized opcode"); + if (LdVT == MVT::i64) return X86::INC64m; + if (LdVT == MVT::i32) return X86::INC32m; + if (LdVT == MVT::i16) return X86::INC16m; + if (LdVT == MVT::i8) return X86::INC8m; + } + llvm_unreachable("unrecognized size for LdVT"); +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; @@ -2355,9 +2445,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; } case ISD::STORE: { + // Change a chain of {load; incr or dec; store} of the same value into + // a simple increment or decrement through memory of that value, if the + // uses of the modified value and its address are suitable. // The DEC64m tablegen pattern is currently not able to match the case where - // the EFLAGS on the original DEC are used. - // we'll need to improve tablegen to allow flags to be transferred from a + // the EFLAGS on the original DEC are used. (This also applies to + // {INC,DEC}X{64,32,16,8}.) + // We'll need to improve tablegen to allow flags to be transferred from a // node in the pattern to the result node. probably with a new keyword // for example, we have this // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", @@ -2367,44 +2461,17 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", // [(store (add (loadi64 addr:$dst), -1), addr:$dst), // (transferrable EFLAGS)]>; + StoreSDNode *StoreNode = cast<StoreSDNode>(Node); - SDValue Chain = StoreNode->getOperand(0); SDValue StoredVal = StoreNode->getOperand(1); - SDValue Address = StoreNode->getOperand(2); - SDValue Undef = StoreNode->getOperand(3); - - if (StoreNode->getMemOperand()->getSize() != 8 || - Undef->getOpcode() != ISD::UNDEF || - Chain->getOpcode() != ISD::LOAD || - StoredVal->getOpcode() != X86ISD::DEC || - StoredVal.getResNo() != 0 || - !StoredVal.getNode()->hasNUsesOfValue(1, 0) || - !Chain.getNode()->hasNUsesOfValue(1, 0) || - StoredVal->getOperand(0).getNode() != Chain.getNode()) - break; + unsigned Opc = StoredVal->getOpcode(); - //OPC_CheckPredicate, 1, // Predicate_nontemporalstore - if (StoreNode->isNonTemporal()) + LoadSDNode *LoadNode = 0; + SDValue InputChain; + if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, + LoadNode, InputChain)) break; - LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode()); - if (LoadNode->getOperand(1) != Address || - LoadNode->getOperand(2) != Undef) - break; - - if (!ISD::isNormalLoad(LoadNode)) - break; - - if (!ISD::isNormalStore(StoreNode)) - break; - - // check load chain has only one use (from the store) - if (!Chain.hasOneUse()) - break; - - // Merge the input chains if they are not intra-pattern references. - SDValue InputChain = LoadNode->getOperand(0); - SDValue Base, Scale, Index, Disp, Segment; if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, Segment)) @@ -2414,7 +2481,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MemOp[0] = StoreNode->getMemOperand(); MemOp[1] = LoadNode->getMemOperand(); const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; - MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m, + EVT LdVT = LoadNode->getMemoryVT(); + unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); + MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), MVT::i32, MVT::Other, Ops, array_lengthof(Ops)); @@ -2465,6 +2534,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, - llvm::CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel) { return new X86DAGToDAGISel(TM, OptLevel); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 88f3829..04299f3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1578,18 +1578,20 @@ X86TargetLowering::LowerReturn(SDValue Chain, MVT::Other, &RetOps[0], RetOps.size()); } -bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { +bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; + SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; + TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() != ISD::FP_EXTEND) return false; @@ -1601,7 +1603,11 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { HasRet = true; } - return HasRet; + if (!HasRet) + return false; + + Chain = TCChain; + return true; } EVT @@ -2929,6 +2935,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::VPERMILP: + case X86ISD::VPERMI: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } } @@ -3970,6 +3977,27 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { return Index / NumElemsPerChunk; } +/// getShuffleCLImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. +/// Handles 256-bit. +static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumElts = VT.getVectorNumElements(); + + assert((VT.is256BitVector() && NumElts == 4) && + "Unsupported vector type for VPERMQ/VPERMPD"); + + unsigned Mask = 0; + for (unsigned i = 0; i != NumElts; ++i) { + int Elt = N->getMaskElt(i); + if (Elt < 0) + continue; + Mask |= Elt << (i*2); + } + + return Mask; +} /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { @@ -4402,6 +4430,7 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT, case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + if (Mask.empty()) return false; break; case X86ISD::MOVDDUP: case X86ISD::MOVLHPD: @@ -4852,41 +4881,42 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, return SDValue(); } -/// isVectorBroadcast - Check if the node chain is suitable to be xformed to -/// a vbroadcast node. We support two patterns: -/// 1. A splat BUILD_VECTOR which uses a single scalar load. +/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction +/// to generate a splat value for the following cases: +/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant. /// 2. A splat shuffle which uses a scalar_to_vector node which comes from -/// a scalar load. -/// The scalar load node is returned when a pattern is found, +/// a scalar load, or a constant. +/// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. -static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { +SDValue +X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { if (!Subtarget->hasAVX()) return SDValue(); EVT VT = Op.getValueType(); - SDValue V = Op; - - if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); - //A suspected load to be broadcasted. SDValue Ld; + bool ConstSplatVal; - switch (V.getOpcode()) { + switch (Op.getOpcode()) { default: // Unknown pattern found. return SDValue(); case ISD::BUILD_VECTOR: { // The BUILD_VECTOR node must be a splat. - if (!isSplatVector(V.getNode())) + if (!isSplatVector(Op.getNode())) return SDValue(); - Ld = V.getOperand(0); + Ld = Op.getOperand(0); + ConstSplatVal = (Ld.getOpcode() == ISD::Constant || + Ld.getOpcode() == ISD::ConstantFP); // The suspected load node has several users. Make sure that all // of its users are from the BUILD_VECTOR node. - if (!Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) + // Constants may have multiple users. + if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) return SDValue(); break; } @@ -4904,15 +4934,50 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { return SDValue(); Ld = Sc.getOperand(0); + ConstSplatVal = (Ld.getOpcode() == ISD::Constant || + Ld.getOpcode() == ISD::ConstantFP); // The scalar_to_vector node and the suspected // load node must have exactly one user. - if (!Sc.hasOneUse() || !Ld.hasOneUse()) + // Constants may have multiple users. + if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse())) return SDValue(); break; } } + bool Is256 = VT.getSizeInBits() == 256; + bool Is128 = VT.getSizeInBits() == 128; + + // Handle the broadcasting a single constant scalar from the constant pool + // into a vector. On Sandybridge it is still better to load a constant vector + // from the constant pool and not to broadcast it from a scalar. + if (ConstSplatVal && Subtarget->hasAVX2()) { + EVT CVT = Ld.getValueType(); + assert(!CVT.isVector() && "Must not broadcast a vector type"); + unsigned ScalarSize = CVT.getSizeInBits(); + + if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) || + (Is128 && (ScalarSize == 32))) { + + const Constant *C = 0; + if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) + C = CI->getConstantIntValue(); + else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld)) + C = CF->getConstantFPValue(); + + assert(C && "Invalid constant type"); + + SDValue CP = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); + Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); + } + } + // The scalar source must be a normal load. if (!ISD::isNormalLoad(Ld.getNode())) return SDValue(); @@ -4921,28 +4986,26 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { if (Ld->hasAnyUseOfValue(1)) return SDValue(); - bool Is256 = VT.getSizeInBits() == 256; - bool Is128 = VT.getSizeInBits() == 128; unsigned ScalarSize = Ld.getValueType().getSizeInBits(); // VBroadcast to YMM if (Is256 && (ScalarSize == 32 || ScalarSize == 64)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // VBroadcast to XMM if (Is128 && (ScalarSize == 32)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The integer check is needed for the 64-bit into 128-bit so it doesn't match // double since there is vbroadcastsd xmm if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) { // VBroadcast to YMM if (Is256 && (ScalarSize == 8 || ScalarSize == 16)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // VBroadcast to XMM if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } // Unsupported broadcast. @@ -4977,9 +5040,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl); } - SDValue LD = isVectorBroadcast(Op, Subtarget); - if (LD.getNode()) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + SDValue Broadcast = LowerVectorBroadcast(Op, DAG); + if (Broadcast.getNode()) + return Broadcast; unsigned EVTBits = ExtVT.getSizeInBits(); @@ -5343,6 +5406,85 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { return LowerAVXCONCAT_VECTORS(Op, DAG); } +// Try to lower a shuffle node into a simple blend instruction. +static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + EVT VT = Op.getValueType(); + EVT InVT = V1.getValueType(); + int MaskSize = VT.getVectorNumElements(); + int InSize = InVT.getVectorNumElements(); + + if (!Subtarget->hasSSE41()) + return SDValue(); + + if (MaskSize != InSize) + return SDValue(); + + int ISDNo = 0; + MVT OpTy; + + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v8i16: + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v8i16; + break; + case MVT::v4i32: + case MVT::v4f32: + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v4f32; + break; + case MVT::v2i64: + case MVT::v2f64: + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v2f64; + break; + case MVT::v8i32: + case MVT::v8f32: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v8f32; + break; + case MVT::v4i64: + case MVT::v4f64: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v4f64; + break; + case MVT::v16i16: + if (!Subtarget->hasAVX2()) + return SDValue(); + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v16i16; + break; + } + assert(ISDNo && "Invalid Op Number"); + + unsigned MaskVals = 0; + + for (int i = 0; i < MaskSize; ++i) { + int EltIdx = SVOp->getMaskElt(i); + if (EltIdx == i || EltIdx == -1) + MaskVals |= (1<<i); + else if (EltIdx == (i + MaskSize)) + continue; // Bit is set to zero; + else return SDValue(); + } + + V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); + SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2, + DAG.getConstant(MaskVals, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Ret); +} + // v8i16 shuffles - Prefer shuffles in the following order: // 1. [all] pshuflw, pshufhw, optional move // 2. [ssse3] 1 x pshufb @@ -5836,96 +5978,79 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { unsigned NumElems = VT.getVectorNumElements(); unsigned NumLaneElems = NumElems / 2; - int MinRange[2][2] = { { static_cast<int>(NumElems), - static_cast<int>(NumElems) }, - { static_cast<int>(NumElems), - static_cast<int>(NumElems) } }; - int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } }; + DebugLoc dl = SVOp->getDebugLoc(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); + SDValue Shufs[2]; - // Collect used ranges for each source in each lane + SmallVector<int, 16> Mask; for (unsigned l = 0; l < 2; ++l) { - unsigned LaneStart = l*NumLaneElems; + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + int InputUsed[2] = { -1, -1 }; // Not yet discovered. + unsigned LaneStart = l * NumLaneElems; for (unsigned i = 0; i != NumLaneElems; ++i) { + // The mask element. This indexes into the input. int Idx = SVOp->getMaskElt(i+LaneStart); - if (Idx < 0) + if (Idx < 0) { + // the mask element does not index into any input vector. + Mask.push_back(-1); continue; - - int Input = 0; - if (Idx >= (int)NumElems) { - Idx -= NumElems; - Input = 1; } - if (Idx > MaxRange[l][Input]) - MaxRange[l][Input] = Idx; - if (Idx < MinRange[l][Input]) - MinRange[l][Input] = Idx; - } - } + // The input vector this mask element indexes into. + int Input = Idx / NumLaneElems; - // Make sure each range is 128-bits - int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } }; - for (unsigned l = 0; l < 2; ++l) { - for (unsigned Input = 0; Input < 2; ++Input) { - if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0) - continue; + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NumLaneElems; - if (MinRange[l][Input] >= 0 && MaxRange[l][Input] < (int)NumLaneElems) - ExtractIdx[l][Input] = 0; - else if (MinRange[l][Input] >= (int)NumLaneElems && - MaxRange[l][Input] < (int)NumElems) - ExtractIdx[l][Input] = NumLaneElems; - else - return SDValue(); - } - } + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) + // This input vector is already an operand. + break; + if (InputUsed[OpNo] < 0) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } - DebugLoc dl = SVOp->getDebugLoc(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NVT = MVT::getVectorVT(EltVT, NumElems/2); + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up. + return SDValue(); + } - SDValue Ops[2][2]; - for (unsigned l = 0; l < 2; ++l) { - for (unsigned Input = 0; Input < 2; ++Input) { - if (ExtractIdx[l][Input] >= 0) - Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input), - DAG.getConstant(ExtractIdx[l][Input], MVT::i32), - DAG, dl); - else - Ops[l][Input] = DAG.getUNDEF(NVT); + // Add the mask index for the new shuffle vector. + Mask.push_back(Idx + OpNo * NumLaneElems); } - } - // Generate 128-bit shuffles - SmallVector<int, 16> Mask1, Mask2; - for (unsigned i = 0; i != NumLaneElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (Elt >= (int)NumElems) { - Elt %= NumLaneElems; - Elt += NumLaneElems; - } else if (Elt >= 0) { - Elt %= NumLaneElems; - } - Mask1.push_back(Elt); - } - for (unsigned i = NumLaneElems; i != NumElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (Elt >= (int)NumElems) { - Elt %= NumLaneElems; - Elt += NumLaneElems; - } else if (Elt >= 0) { - Elt %= NumLaneElems; + if (InputUsed[0] < 0) { + // No input vectors were used! The result is undefined. + Shufs[l] = DAG.getUNDEF(NVT); + } else { + SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2), + DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32), + DAG, dl); + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) : + Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2), + DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32), + DAG, dl); + // At least one input vector was used. Create a new shuffle vector. + Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]); } - Mask2.push_back(Elt); - } - SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]); - SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]); + Mask.clear(); + } // Concatenate the result back - SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1, + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0], DAG.getConstant(0, MVT::i32), DAG, dl); - return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32), + return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32), DAG, dl); } @@ -6203,10 +6328,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { getShuffleSHUFImmediate(SVOp), DAG); } -static -SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI, - const X86Subtarget *Subtarget) { +SDValue +X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -6222,9 +6345,9 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, int Size = VT.getSizeInBits(); // Use vbroadcast whenever the splat comes from a foldable load - SDValue LD = isVectorBroadcast(Op, Subtarget); - if (LD.getNode()) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + SDValue Broadcast = LowerVectorBroadcast(Op, DAG); + if (Broadcast.getNode()) + return Broadcast; // Handle splats by matching through known shuffle masks if ((Size == 128 && NumElem <= 4) || @@ -6309,7 +6432,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Normalize the input vectors. Here splats, zeroed vectors, profitable // narrowing and commutation of operands should be handled. The actual code // doesn't include all of those, work in progress... - SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget); + SDValue NewOp = NormalizeVectorShuffle(Op, DAG); if (NewOp.getNode()) return NewOp; @@ -6524,6 +6647,27 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); + SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG); + if (BlendOp.getNode()) + return BlendOp; + + if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) { + SmallVector<SDValue, 8> permclMask; + for (unsigned i = 0; i != 8; ++i) { + permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32)); + } + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, + &permclMask[0], 8); + // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32 + return DAG.getNode(X86ISD::VPERMV, dl, VT, + DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1); + } + + if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64)) + return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, + getShuffleCLImmediate(SVOp), DAG); + + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -7182,8 +7326,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) GV = GA->resolveAliasedGlobal(false); - TLSModel::Model model - = getTLSModel(GV, getTargetMachine().getRelocationModel()); + TLSModel::Model model = getTargetMachine().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: @@ -8099,8 +8242,8 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, unsigned BitWidth = Op0.getValueSizeInBits(); unsigned AndBitWidth = And.getValueSizeInBits(); if (BitWidth > AndBitWidth) { - APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones; - DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones); + APInt Zeros, Ones; + DAG.ComputeMaskedBits(Op0, Zeros, Ones); if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) return SDValue(); } @@ -9449,12 +9592,12 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_vperm2i128: return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::x86_avx_vpermil_ps: - case Intrinsic::x86_avx_vpermil_pd: - case Intrinsic::x86_avx_vpermil_ps_256: - case Intrinsic::x86_avx_vpermil_pd_256: - return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_permd: + case Intrinsic::x86_avx2_permps: + // Operands intentionally swapped. Mask is last operand to intrinsic, + // but second operand for node/intruction. + return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(1)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest @@ -10963,6 +11106,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; + case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; + case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; @@ -11035,6 +11181,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; + case X86ISD::VPERMV: return "X86ISD::VPERMV"; + case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; @@ -11192,14 +11340,15 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, unsigned notOpc, unsigned EAXreg, const TargetRegisterClass *RC, - bool invSrc) const { + bool Invert) const { // For the atomic bitwise operator, we generate // thisMBB: // newMBB: // ld t1 = [bitinstr.addr] // op t2 = t1, [bitinstr.val] + // not t3 = t2 (if Invert) // mov EAX = t1 - // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // lcs dest = [bitinstr.addr], t3 [EAX is implicit] // bz newMBB // fallthrough -->nextMBB const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -11247,13 +11396,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); - unsigned tt = F->getRegInfo().createVirtualRegister(RC); - if (invSrc) { - MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1); - } - else - tt = t1; - unsigned t2 = F->getRegInfo().createVirtualRegister(RC); assert((argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) && @@ -11262,16 +11404,23 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2); else MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2); - MIB.addReg(tt); + MIB.addReg(t1); (*MIB).addOperand(*argOpers[valArgIndx]); + unsigned t3 = F->getRegInfo().createVirtualRegister(RC); + if (Invert) { + MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2); + } + else + t3 = t2; + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); - MIB.addReg(t2); + MIB.addReg(t3); assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); @@ -11294,7 +11443,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, unsigned regOpcH, unsigned immOpcL, unsigned immOpcH, - bool invSrc) const { + bool Invert) const { // For the atomic bitwise operator, we generate // thisMBB (instructions are in pairs, except cmpxchg8b) // ld t1,t2 = [bitinstr.addr] @@ -11302,6 +11451,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) // op t5, t6 <- out1, out2, [bitinstr.val] // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) + // neg t7, t8 < t5, t6 (if Invert) // mov ECX, EBX <- t5, t6 // mov EAX, EDX <- t1, t2 // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] @@ -11385,16 +11535,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); // The subsequent operations should be using the destination registers of - //the PHI instructions. - if (invSrc) { - t1 = F->getRegInfo().createVirtualRegister(RC); - t2 = F->getRegInfo().createVirtualRegister(RC); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg()); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg()); - } else { - t1 = dest1Oper.getReg(); - t2 = dest2Oper.getReg(); - } + // the PHI instructions. + t1 = dest1Oper.getReg(); + t2 = dest2Oper.getReg(); int valArgIndx = lastAddrIndx + 1; assert((argOpers[valArgIndx]->isReg() || @@ -11421,15 +11564,26 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); + unsigned t7, t8; + if (Invert) { + t7 = F->getRegInfo().createVirtualRegister(RC); + t8 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6); + } else { + t7 = t5; + t8 = t6; + } + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); MIB.addReg(t2); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); - MIB.addReg(t5); + MIB.addReg(t7); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); - MIB.addReg(t6); + MIB.addReg(t8); MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); for (int i=0; i <= lastAddrIndx; ++i) @@ -12620,11 +12774,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, //===----------------------------------------------------------------------===// void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { + unsigned BitWidth = KnownZero.getBitWidth(); unsigned Opc = Op.getOpcode(); assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN || @@ -12633,7 +12787,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. switch (Opc) { default: break; case X86ISD::ADD: @@ -12652,8 +12806,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; // Fallthrough case X86ISD::SETCC: - KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), - Mask.getBitWidth() - 1); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ISD::INTRINSIC_WO_CHAIN: { unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -12678,8 +12831,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break; case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break; } - KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), - Mask.getBitWidth() - NumLoBits); + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits); break; } } @@ -14000,13 +14152,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. - if (Mask.getOpcode() != ISD::BITCAST || - X.getOpcode() != ISD::BITCAST || - Y.getOpcode() != ISD::BITCAST) - return SDValue(); - // Look through mask bitcast. - Mask = Mask.getOperand(0); + if (Mask.getOpcode() == ISD::BITCAST) + Mask = Mask.getOperand(0); + if (X.getOpcode() == ISD::BITCAST) + X = X.getOperand(0); + if (Y.getOpcode() == ISD::BITCAST) + Y = Y.getOperand(0); + EVT MaskVT = Mask.getValueType(); // Validate that the Mask operand is a vector sra node. @@ -14027,8 +14180,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // Now we know we at least have a plendvb with the mask val. See if // we can form a psignb/w/d. // psign = x.type == y.type == mask.type && y = sub(0, x); - X = X.getOperand(0); - Y = Y.getOperand(0); if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0327b1f..09116e8 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -175,9 +175,14 @@ namespace llvm { /// PSIGN - Copy integer sign. PSIGN, - /// BLEND family of opcodes + /// BLENDV - Blend where the selector is an XMM. BLENDV, + /// BLENDxx - Blend where the selector is an immediate. + BLENDPW, + BLENDPS, + BLENDPD, + /// HADD - Integer horizontal add. HADD, @@ -280,6 +285,8 @@ namespace llvm { UNPCKL, UNPCKH, VPERMILP, + VPERMV, + VPERMI, VPERM2X128, VBROADCAST, @@ -504,7 +511,6 @@ namespace llvm { /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -781,6 +787,8 @@ namespace llvm { // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; + SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -804,7 +812,7 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; @@ -849,7 +857,7 @@ namespace llvm { unsigned notOpc, unsigned EAXreg, const TargetRegisterClass *RC, - bool invSrc = false) const; + bool Invert = false) const; MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( MachineInstr *BInstr, @@ -858,7 +866,7 @@ namespace llvm { unsigned regOpcH, unsigned immOpcL, unsigned immOpcH, - bool invSrc = false) const; + bool Invert = false) const; /// Utility function to emit atomic min and max. It takes the min/max /// instruction to expand, the associated basic block, and the associated diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 7fa7499..0eee083 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -53,7 +53,7 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)]>; // AL,AH = AL*GR8 + (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), @@ -97,31 +97,32 @@ def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), let neverHasSideEffects = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; - // AL,AH = AL*GR8 +def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], + IIC_IMUL8>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, - OpSize; // AX,DX = AX*GR16 +def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], + IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; - // EAX,EDX = EAX*GR32 +def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], + IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>; - // RAX,RDX = RAX*GR64 +def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], + IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; + // AX,DX = AX*[mem16] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] } } // neverHasSideEffects @@ -639,10 +640,11 @@ class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, // BinOpRR - Instructions like "add reg, reg, reg". class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, list<dag> pattern, Format f = MRMDestReg> + dag outlist, list<dag> pattern, InstrItinClass itin, + Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -650,7 +652,8 @@ class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + IIC_BIN_NONMEM>; // BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has // just a EFLAGS as a result. @@ -659,7 +662,7 @@ class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRR<opcode, mnemonic, typeinfo, (outs), [(set EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], - f>; + IIC_BIN_NONMEM, f>; // BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result. @@ -667,7 +670,8 @@ class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + IIC_BIN_NONMEM>; // BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result, and has EFLAGS as input. @@ -676,14 +680,14 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2, - EFLAGS))]>; + EFLAGS))], IIC_BIN_NONMEM>; // BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding). class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", []> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; } @@ -692,7 +696,7 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", []> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; } @@ -702,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -738,7 +742,7 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { let ImmT = typeinfo.ImmEncoding; } @@ -762,7 +766,6 @@ class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; - // BinOpRI_RFF - Instructions like "adc reg, reg, imm". class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -776,7 +779,7 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -853,7 +856,6 @@ class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo, [(store (opnode (typeinfo.VT (load addr:$dst)), typeinfo.ImmOperator:$src), addr:$dst), (implicit EFLAGS)]>; - // BinOpMI_RMW_FF - Instructions like "adc [mem], imm". class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -1219,12 +1221,12 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V; } } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 42a5014..6f9e849 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -301,34 +301,67 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), // String Pseudo Instructions // let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { -def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", - [(X86rep_movs i8)], IIC_REP_MOVS>, REP; -def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", - [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize; -def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", - [(X86rep_movs i32)], IIC_REP_MOVS>, REP; +def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)], IIC_REP_MOVS>, REP, + Requires<[In32BitMode]>; +def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, + Requires<[In32BitMode]>; +def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)], IIC_REP_MOVS>, REP, + Requires<[In32BitMode]>; } -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in -def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", - [(X86rep_movs i64)], IIC_REP_MOVS>, REP; - +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { +def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, + Requires<[In64BitMode]>; +def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", + [(X86rep_movs i64)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +} // FIXME: Should use "(X86rep_stos AL)" as the pattern. -let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", - [(X86rep_stos i8)], IIC_REP_STOS>, REP; -let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", - [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize; -let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", - [(X86rep_stos i32)], IIC_REP_STOS>, REP; - -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in -def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", - [(X86rep_stos i64)], IIC_REP_STOS>, REP; +let Defs = [ECX,EDI], isCodeGenOnly = 1 in { + let Uses = [AL,ECX,EDI] in + def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)], IIC_REP_STOS>, REP, + Requires<[In32BitMode]>; + let Uses = [AX,ECX,EDI] in + def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, + Requires<[In32BitMode]>; + let Uses = [EAX,ECX,EDI] in + def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)], IIC_REP_STOS>, REP, + Requires<[In32BitMode]>; +} +let Defs = [RCX,RDI], isCodeGenOnly = 1 in { + let Uses = [AL,RCX,RDI] in + def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; + let Uses = [AX,RCX,RDI] in + def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, + Requires<[In64BitMode]>; + let Uses = [RAX,RCX,RDI] in + def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; + + let Uses = [RAX,RCX,RDI] in + def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", + [(X86rep_stos i64)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; +} //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -1134,12 +1167,10 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0); APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0); return (~KnownZero0 & ~KnownZero1) == 0; }]>; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index ba86098..bf11fde 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -21,20 +21,25 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; + def RETW : I <0xC3, RawFrm, (outs), (ins variable_ops), + "ret{w}", + [], IIC_RET>, OpSize; def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), - "retw\t$amt", + "ret{w}\t$amt", [], IIC_RET_IMM>, OpSize; def LRETL : I <0xCB, RawFrm, (outs), (ins), - "lretl", [], IIC_RET>; + "{l}ret{l|f}", [], IIC_RET>; + def LRETW : I <0xCB, RawFrm, (outs), (ins), + "{l}ret{w|f}", [], IIC_RET>, OpSize; def LRETQ : RI <0xCB, RawFrm, (outs), (ins), - "lretq", [], IIC_RET>; + "{l}ret{q|f}", [], IIC_RET>; def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lret\t$amt", [], IIC_RET>; + "{l}ret{l|f}\t$amt", [], IIC_RET>; def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lretw\t$amt", [], IIC_RET>, OpSize; + "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize; } // Unconditional branches. diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ae3ed1b..35801e4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -126,6 +126,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, +SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -153,11 +155,17 @@ def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; +def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; +def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 307c96b..b12c1db 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1049,9 +1049,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 }, { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 }, { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 }, - { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 }, + { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 }, { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, - { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 }, + { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 }, { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 }, { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 }, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index dd7cf50..6a25312 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -374,6 +374,11 @@ def SSECC : Operand<i8> { let OperandType = "OPERAND_IMMEDIATE"; } +def AVXCC : Operand<i8> { + let PrintMethod = "printSSECC"; + let OperandType = "OPERAND_IMMEDIATE"; +} + class ImmSExtAsmOperandClass : AsmOperandClass { let SuperClasses = [ImmAsmOperand]; let RenderMethod = "addImmOperands"; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index df42627..65e3c1e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2162,15 +2162,15 @@ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, - SDNode OpNode, ValueType VT, PatFrag ld_frag, - string asm, string asm_alt, + Operand CC, SDNode OpNode, ValueType VT, + PatFrag ld_frag, string asm, string asm_alt, OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, + (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], itins.rr>; def rm : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], itins.rm>; @@ -2187,57 +2187,57 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, } } -defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, +defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSE_ALU_F32S>, XS, VEX_4V, VEX_LIG; -defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, +defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSE_ALU_F32S>, // same latency as 32 bit compare XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { - defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, + defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, XS; - defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, + defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, // same latency as 32 bit compare XD; } -multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, +multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, Intrinsic Int, string asm, OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), asm, + (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], itins.rr>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, x86memop:$src, SSECC:$cc), asm, + (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], itins.rm>; } // Aliases to match intrinsics which expect XMM operand(s). -defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, +defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", SSE_ALU_F32S>, XS, VEX_4V; -defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, +defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", SSE_ALU_F32S>, // same latency as f32 XD, VEX_4V; let Constraints = "$src1 = $dst" in { - defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, + defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $dst|$dst, $src}", SSE_ALU_F32S>, XS; - defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, + defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $dst|$dst, $src}", SSE_ALU_F32S>, // same latency as f32 XD; @@ -2308,50 +2308,50 @@ let Defs = [EFLAGS] in { // sse12_cmp_packed - sse 1 & 2 compare packed instructions multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, - Intrinsic Int, string asm, string asm_alt, - Domain d> { - let isAsmParserOnly = 1 in { - def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; - def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; - } + Operand CC, Intrinsic Int, string asm, + string asm_alt, Domain d> { + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], + IIC_SSE_CMPP_RR, d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], + IIC_SSE_CMPP_RM, d>; // Accept explicit immediate argument form instead of comparison code. - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + let neverHasSideEffects = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RR, d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RM, d>; + } } -defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, +defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedSingle>, TB, VEX_4V; -defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, +defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedDouble>, TB, OpSize, VEX_4V; -defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, +defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedSingle>, TB, VEX_4V; -defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, +defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedDouble>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { - defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, + defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedSingle>, TB; - defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, + defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd, "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedDouble>, TB, OpSize; @@ -6331,11 +6331,11 @@ def : Pat<(f64 (ftrunc FR64:$src)), let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize, VEX; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize, VEX; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), @@ -6351,11 +6351,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; } @@ -6735,12 +6735,32 @@ let Predicates = [HasAVX] in { def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + + def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), + (imm:$mask))), + (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; + def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), + (imm:$mask))), + (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; + + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + (imm:$mask))), + (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (imm:$mask))), + (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (imm:$mask))), + (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), + (imm:$mask))), + (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6789,6 +6809,17 @@ let Predicates = [HasSSE41] in { def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; + + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + (imm:$mask))), + (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (imm:$mask))), + (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (imm:$mask))), + (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; + } let Predicates = [HasAVX] in @@ -7294,6 +7325,46 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), []>, VEX_4V; } +let Predicates = [HasAVX] in { +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // @@ -7664,45 +7735,47 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), // multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, - Intrinsic Int> { + ValueType OpVT> { def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V; + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, - (bitconvert (mem_frag addr:$src2))))]>, + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, + (bitconvert (mem_frag addr:$src2)))))]>, VEX_4V; } -defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>; +defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; +defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>; multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, - Intrinsic Int> { - def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), + ValueType OpVT> { + def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX; - def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), + [(set VR256:$dst, + (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, VEX; + def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>, - VEX; + [(set VR256:$dst, + (OpVT (X86VPermi (mem_frag addr:$src1), + (i8 imm:$src2))))]>, VEX; } -defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>, - VEX_W; +defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W; let ExeDomain = SSEPackedDouble in -defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, - VEX_W; +defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks @@ -7743,18 +7816,17 @@ def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values // +let neverHasSideEffects = 1 in { def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>, + []>, VEX_4V; def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i128mem:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), - imm:$src3))]>, VEX_4V; + []>, VEX_4V; +} let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), @@ -7775,47 +7847,6 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (INSERT_get_vinsertf128_imm VR256:$ins))>; } -// AVX1 patterns -let Predicates = [HasAVX] in { -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; - -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -} - //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values // @@ -7830,7 +7861,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), (v2i64 (VEXTRACTI128rr (v4i64 VR256:$src1), diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 3eb9441..ed1a409 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -425,7 +424,9 @@ bool X86Subtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + //TODO: change back to ANTIDEP_CRITICAL when the + // X86 subtarget properly sets up post RA liveness. + Mode = TargetSubtargetInfo::ANTIDEP_NONE; CriticalPathRCs.clear(); return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index c0d2a9c..718f35e 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -14,7 +14,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/Mangler.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; |