aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp31
-rw-r--r--lib/Target/X86/AsmPrinter/Android.mk57
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp722
-rw-r--r--lib/Target/X86/CMakeLists.txt15
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt2
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp8
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c13
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h2
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h2
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp42
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp2
-rw-r--r--lib/Target/X86/Makefile2
-rw-r--r--lib/Target/X86/README-SSE.txt11
-rw-r--r--lib/Target/X86/README-X86-64.txt44
-rw-r--r--lib/Target/X86/README.txt367
-rw-r--r--lib/Target/X86/Utils/CMakeLists.txt6
-rw-r--r--lib/Target/X86/Utils/Makefile15
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp190
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h87
-rw-r--r--lib/Target/X86/X86.h10
-rw-r--r--lib/Target/X86/X86.td20
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp188
-rw-r--r--lib/Target/X86/X86CallingConv.td23
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp51
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h17
-rw-r--r--lib/Target/X86/X86FastISel.cpp184
-rw-r--r--lib/Target/X86/X86FixupKinds.h10
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp104
-rw-r--r--lib/Target/X86/X86FrameInfo.h46
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp (renamed from lib/Target/X86/X86FrameInfo.cpp)392
-rw-r--r--lib/Target/X86/X86FrameLowering.h65
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp96
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1677
-rw-r--r--lib/Target/X86/X86ISelLowering.h75
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td143
-rw-r--r--lib/Target/X86/X86InstrCompiler.td94
-rw-r--r--lib/Target/X86/X86InstrControl.td76
-rw-r--r--lib/Target/X86/X86InstrFPStack.td44
-rw-r--r--lib/Target/X86/X86InstrFormats.td17
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td42
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp181
-rw-r--r--lib/Target/X86/X86InstrInfo.h119
-rw-r--r--lib/Target/X86/X86InstrInfo.td107
-rw-r--r--lib/Target/X86/X86InstrSSE.td196
-rw-r--r--lib/Target/X86/X86InstrSystem.td18
-rw-r--r--lib/Target/X86/X86JITInfo.cpp14
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp13
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp70
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp68
-rw-r--r--lib/Target/X86/X86MachObjectWriter.cpp32
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp249
-rw-r--r--lib/Target/X86/X86RegisterInfo.h10
-rw-r--r--lib/Target/X86/X86RegisterInfo.td73
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp6
-rw-r--r--lib/Target/X86/X86ShuffleDecode.h155
-rw-r--r--lib/Target/X86/X86Subtarget.cpp19
-rw-r--r--lib/Target/X86/X86Subtarget.h24
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp37
-rw-r--r--lib/Target/X86/X86TargetMachine.h10
59 files changed, 3472 insertions, 2921 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 655eeb5..8fe549b 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -44,8 +44,6 @@ private:
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
-
X86Operand *ParseOperand();
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
@@ -71,6 +69,7 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(
&TM.getSubtarget<X86Subtarget>()));
}
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -622,6 +621,11 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
StringRef PatchedName = Name;
+ // FIXME: Hack to recognize setneb as setne.
+ if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+ PatchedName != "setb" && PatchedName != "setnb")
+ PatchedName = PatchedName.substr(0, Name.size()-1);
+
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
@@ -707,7 +711,8 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
bool isPrefix =
Name == "lock" || Name == "rep" ||
Name == "repe" || Name == "repz" ||
- Name == "repne" || Name == "repnz";
+ Name == "repne" || Name == "repnz" ||
+ Name == "rex64" || Name == "data16";
// This does the actual operand parsing. Don't parse any more if we have a
@@ -744,13 +749,16 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
Parser.EatToEndOfStatement();
- return TokError("unexpected token in argument list");
+ return Error(Loc, "unexpected token in argument list");
}
}
if (getLexer().is(AsmToken::EndOfStatement))
Parser.Lex(); // Consume the EndOfStatement
+ else if (isPrefix && getLexer().is(AsmToken::Slash))
+ Parser.Lex(); // Consume the prefix separator Slash
// This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
// "outb %al, %dx". Out doesn't take a memory form, but this is a widely
@@ -767,6 +775,19 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
delete &Op;
}
}
+ // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
+ if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
// FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
// "shift <op>".
@@ -834,6 +855,8 @@ MatchAndEmitInstruction(SMLoc IDLoc,
case Match_MissingFeature:
Error(IDLoc, "instruction requires a CPU feature not currently enabled");
return true;
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
diff --git a/lib/Target/X86/AsmPrinter/Android.mk b/lib/Target/X86/AsmPrinter/Android.mk
deleted file mode 100644
index 3f5538a..0000000
--- a/lib/Target/X86/AsmPrinter/Android.mk
+++ /dev/null
@@ -1,57 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-
-x86_asm_printer_TBLGEN_TABLES := \
- X86GenAsmWriter.inc \
- X86GenAsmWriter1.inc \
- X86GenInstrNames.inc \
- X86GenRegisterNames.inc \
- X86GenRegisterInfo.h.inc
-
-x86_asm_printer_SRC_FILES := \
- X86AsmPrinter.cpp
-
-# For the host
-# =====================================================
-include $(CLEAR_VARS)
-include $(CLEAR_TBLGEN_VARS)
-
-TBLGEN_TABLES := $(x86_asm_printer_TBLGEN_TABLES)
-
-TBLGEN_TD_DIR := $(LOCAL_PATH)/..
-
-LOCAL_SRC_FILES := $(x86_asm_printer_SRC_FILES)
-
-LOCAL_C_INCLUDES += \
- $(LOCAL_PATH)/..
-
-LOCAL_MODULE:= libLLVMX86AsmPrinter
-
-LOCAL_MODULE_TAGS := optional
-
-include $(LLVM_HOST_BUILD_MK)
-include $(LLVM_TBLGEN_RULES_MK)
-include $(BUILD_HOST_STATIC_LIBRARY)
-
-# For the device
-# =====================================================
-ifeq ($(TARGET_ARCH),x86)
-include $(CLEAR_VARS)
-include $(CLEAR_TBLGEN_VARS)
-
-TBLGEN_TABLES := $(x86_asm_printer_TBLGEN_TABLES)
-
-TBLGEN_TD_DIR := $(LOCAL_PATH)/..
-
-LOCAL_SRC_FILES := $(x86_asm_printer_SRC_FILES)
-
-LOCAL_C_INCLUDES += \
- $(LOCAL_PATH)/..
-
-LOCAL_MODULE:= libLLVMX86AsmPrinter
-
-LOCAL_MODULE_TAGS := optional
-
-include $(LLVM_DEVICE_BUILD_MK)
-include $(LLVM_TBLGEN_RULES_MK)
-include $(BUILD_STATIC_LIBRARY)
-endif
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
deleted file mode 100644
index d0aa290..0000000
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ /dev/null
@@ -1,722 +0,0 @@
-//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to X86 machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86AsmPrinter.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
-#include "X86MCInstLower.h"
-#include "X86.h"
-#include "X86COFFMachineModuleInfo.h"
-#include "X86MachineFunctionInfo.h"
-#include "X86TargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Primitive Helper Functions.
-//===----------------------------------------------------------------------===//
-
-/// runOnMachineFunction - Emit the function body.
-///
-bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- SetupMachineFunction(MF);
-
- if (Subtarget->isTargetCOFF()) {
- bool Intrn = MF.getFunction()->hasInternalLinkage();
- OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
- : COFF::IMAGE_SYM_CLASS_EXTERNAL);
- OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
- << COFF::SCT_COMPLEX_TYPE_SHIFT);
- OutStreamer.EndCOFFSymbolDef();
- }
-
- // Have common code print out the function header with linkage info etc.
- EmitFunctionHeader();
-
- // Emit the rest of the function body.
- EmitFunctionBody();
-
- // We didn't modify anything.
- return false;
-}
-
-/// printSymbolOperand - Print a raw symbol reference operand. This handles
-/// jump tables, constant pools, global address and external symbols, all of
-/// which print to a label with various suffixes for relocation types etc.
-void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
- raw_ostream &O) {
- switch (MO.getType()) {
- default: llvm_unreachable("unknown symbol type!");
- case MachineOperand::MO_JumpTableIndex:
- O << *GetJTISymbol(MO.getIndex());
- break;
- case MachineOperand::MO_ConstantPoolIndex:
- O << *GetCPISymbol(MO.getIndex());
- printOffset(MO.getOffset(), O);
- break;
- case MachineOperand::MO_GlobalAddress: {
- const GlobalValue *GV = MO.getGlobal();
-
- MCSymbol *GVSym;
- if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
- GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
- else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
- MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
- MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
- GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- else
- GVSym = Mang->getSymbol(GV);
-
- // Handle dllimport linkage.
- if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
- GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
-
- if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
- MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
- MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
- if (StubSym.getPointer() == 0)
- StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
- } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
- MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
- if (StubSym.getPointer() == 0)
- StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
- } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
- MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
- if (StubSym.getPointer() == 0)
- StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
- }
-
- // If the name begins with a dollar-sign, enclose it in parens. We do this
- // to avoid having it look like an integer immediate to the assembler.
- if (GVSym->getName()[0] != '$')
- O << *GVSym;
- else
- O << '(' << *GVSym << ')';
- printOffset(MO.getOffset(), O);
- break;
- }
- case MachineOperand::MO_ExternalSymbol: {
- const MCSymbol *SymToPrint;
- if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
- SmallString<128> TempNameStr;
- TempNameStr += StringRef(MO.getSymbolName());
- TempNameStr += StringRef("$stub");
-
- MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
- if (StubSym.getPointer() == 0) {
- TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
- StubSym = MachineModuleInfoImpl::
- StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()),
- true);
- }
- SymToPrint = StubSym.getPointer();
- } else {
- SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
- }
-
- // If the name begins with a dollar-sign, enclose it in parens. We do this
- // to avoid having it look like an integer immediate to the assembler.
- if (SymToPrint->getName()[0] != '$')
- O << *SymToPrint;
- else
- O << '(' << *SymToPrint << '(';
- break;
- }
- }
-
- switch (MO.getTargetFlags()) {
- default:
- llvm_unreachable("Unknown target flag on GV operand");
- case X86II::MO_NO_FLAG: // No flag.
- break;
- case X86II::MO_DARWIN_NONLAZY:
- case X86II::MO_DLLIMPORT:
- case X86II::MO_DARWIN_STUB:
- // These affect the name of the symbol, not any suffix.
- break;
- case X86II::MO_GOT_ABSOLUTE_ADDRESS:
- O << " + [.-" << *MF->getPICBaseSymbol() << ']';
- break;
- case X86II::MO_PIC_BASE_OFFSET:
- case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
- case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
- O << '-' << *MF->getPICBaseSymbol();
- break;
- case X86II::MO_TLSGD: O << "@TLSGD"; break;
- case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
- case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
- case X86II::MO_TPOFF: O << "@TPOFF"; break;
- case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
- case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
- case X86II::MO_GOT: O << "@GOT"; break;
- case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
- case X86II::MO_PLT: O << "@PLT"; break;
- case X86II::MO_TLVP: O << "@TLVP"; break;
- case X86II::MO_TLVP_PIC_BASE:
- O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
- break;
- }
-}
-
-/// print_pcrel_imm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value. These print slightly differently, for
-/// example, a $ is not emitted.
-void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- switch (MO.getType()) {
- default: llvm_unreachable("Unknown pcrel immediate operand");
- case MachineOperand::MO_Register:
- // pc-relativeness was handled when computing the value in the reg.
- printOperand(MI, OpNo, O);
- return;
- case MachineOperand::MO_Immediate:
- O << MO.getImm();
- return;
- case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
- return;
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ExternalSymbol:
- printSymbolOperand(MO, O);
- return;
- }
-}
-
-
-void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- switch (MO.getType()) {
- default: llvm_unreachable("unknown operand type!");
- case MachineOperand::MO_Register: {
- O << '%';
- unsigned Reg = MO.getReg();
- if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- EVT VT = (strcmp(Modifier+6,"64") == 0) ?
- MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
- ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
- Reg = getX86SubSuperRegister(Reg, VT);
- }
- O << X86ATTInstPrinter::getRegisterName(Reg);
- return;
- }
-
- case MachineOperand::MO_Immediate:
- O << '$' << MO.getImm();
- return;
-
- case MachineOperand::MO_JumpTableIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ExternalSymbol: {
- O << '$';
- printSymbolOperand(MO, O);
- break;
- }
- }
-}
-
-void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
- raw_ostream &O) {
- unsigned char value = MI->getOperand(Op).getImm();
- assert(value <= 7 && "Invalid ssecc argument!");
- switch (value) {
- case 0: O << "eq"; break;
- case 1: O << "lt"; break;
- case 2: O << "le"; break;
- case 3: O << "unord"; break;
- case 4: O << "neq"; break;
- case 5: O << "nlt"; break;
- case 6: O << "nle"; break;
- case 7: O << "ord"; break;
- }
-}
-
-void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
- raw_ostream &O, const char *Modifier) {
- const MachineOperand &BaseReg = MI->getOperand(Op);
- const MachineOperand &IndexReg = MI->getOperand(Op+2);
- const MachineOperand &DispSpec = MI->getOperand(Op+3);
-
- // If we really don't want to print out (rip), don't.
- bool HasBaseReg = BaseReg.getReg() != 0;
- if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
- BaseReg.getReg() == X86::RIP)
- HasBaseReg = false;
-
- // HasParenPart - True if we will print out the () part of the mem ref.
- bool HasParenPart = IndexReg.getReg() || HasBaseReg;
-
- if (DispSpec.isImm()) {
- int DispVal = DispSpec.getImm();
- if (DispVal || !HasParenPart)
- O << DispVal;
- } else {
- assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
- DispSpec.isJTI() || DispSpec.isSymbol());
- printSymbolOperand(MI->getOperand(Op+3), O);
- }
-
- if (HasParenPart) {
- assert(IndexReg.getReg() != X86::ESP &&
- "X86 doesn't allow scaling by ESP");
-
- O << '(';
- if (HasBaseReg)
- printOperand(MI, Op, O, Modifier);
-
- if (IndexReg.getReg()) {
- O << ',';
- printOperand(MI, Op+2, O, Modifier);
- unsigned ScaleVal = MI->getOperand(Op+1).getImm();
- if (ScaleVal != 1)
- O << ',' << ScaleVal;
- }
- O << ')';
- }
-}
-
-void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
- raw_ostream &O, const char *Modifier) {
- assert(isMem(MI, Op) && "Invalid memory reference!");
- const MachineOperand &Segment = MI->getOperand(Op+4);
- if (Segment.getReg()) {
- printOperand(MI, Op+4, O, Modifier);
- O << ':';
- }
- printLeaMemReference(MI, Op, O, Modifier);
-}
-
-void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
- raw_ostream &O) {
- O << *MF->getPICBaseSymbol() << '\n';
- O << *MF->getPICBaseSymbol() << ':';
-}
-
-bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
- raw_ostream &O) {
- unsigned Reg = MO.getReg();
- switch (Mode) {
- default: return true; // Unknown mode.
- case 'b': // Print QImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i8);
- break;
- case 'h': // Print QImode high register
- Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
- break;
- case 'w': // Print HImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i16);
- break;
- case 'k': // Print SImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i32);
- break;
- case 'q': // Print DImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i64);
- break;
- }
-
- O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
- return false;
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
- // Does this asm operand have a single letter operand modifier?
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
- const MachineOperand &MO = MI->getOperand(OpNo);
-
- switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
- case 'a': // This is an address. Currently only 'i' and 'r' are expected.
- if (MO.isImm()) {
- O << MO.getImm();
- return false;
- }
- if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
- printSymbolOperand(MO, O);
- if (Subtarget->isPICStyleRIPRel())
- O << "(%rip)";
- return false;
- }
- if (MO.isReg()) {
- O << '(';
- printOperand(MI, OpNo, O);
- O << ')';
- return false;
- }
- return true;
-
- case 'c': // Don't print "$" before a global var name or constant.
- if (MO.isImm())
- O << MO.getImm();
- else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
- printSymbolOperand(MO, O);
- else
- printOperand(MI, OpNo, O);
- return false;
-
- case 'A': // Print '*' before a register (it must be a register)
- if (MO.isReg()) {
- O << '*';
- printOperand(MI, OpNo, O);
- return false;
- }
- return true;
-
- case 'b': // Print QImode register
- case 'h': // Print QImode high register
- case 'w': // Print HImode register
- case 'k': // Print SImode register
- case 'q': // Print DImode register
- if (MO.isReg())
- return printAsmMRegister(MO, ExtraCode[0], O);
- printOperand(MI, OpNo, O);
- return false;
-
- case 'P': // This is the operand of a call, treat specially.
- print_pcrel_imm(MI, OpNo, O);
- return false;
-
- case 'n': // Negate the immediate or print a '-' before the operand.
- // Note: this is a temporary solution. It should be handled target
- // independently as part of the 'MC' work.
- if (MO.isImm()) {
- O << -MO.getImm();
- return false;
- }
- O << '-';
- }
- }
-
- printOperand(MI, OpNo, O);
- return false;
-}
-
-bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
- switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
- case 'b': // Print QImode register
- case 'h': // Print QImode high register
- case 'w': // Print HImode register
- case 'k': // Print SImode register
- case 'q': // Print SImode register
- // These only apply to registers, ignore on mem.
- break;
- case 'P': // Don't print @PLT, but do print as memory.
- printMemReference(MI, OpNo, O, "no-rip");
- return false;
- }
- }
- printMemReference(MI, OpNo, O);
- return false;
-}
-
-void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (Subtarget->isTargetDarwin())
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
-}
-
-
-void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetDarwin()) {
- // All darwin targets use mach-o.
- MachineModuleInfoMachO &MMIMacho =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
- // Output stubs for dynamically-linked functions.
- MachineModuleInfoMachO::SymbolListTy Stubs;
-
- Stubs = MMIMacho.GetFnStubList();
- if (!Stubs.empty()) {
- const MCSection *TheSection =
- OutContext.getMachOSection("__IMPORT", "__jump_table",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- 5, SectionKind::getMetadata());
- OutStreamer.SwitchSection(TheSection);
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .indirect_symbol _foo
- OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
- MCSA_IndirectSymbol);
- // hlt; hlt; hlt; hlt; hlt hlt = 0xf4 = -12.
- const char HltInsts[] = { -12, -12, -12, -12, -12 };
- OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
- }
-
- Stubs.clear();
- OutStreamer.AddBlankLine();
- }
-
- // Output stubs for external and common global variables.
- Stubs = MMIMacho.GetGVStubList();
- if (!Stubs.empty()) {
- const MCSection *TheSection =
- OutContext.getMachOSection("__IMPORT", "__pointers",
- MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
- SectionKind::getMetadata());
- OutStreamer.SwitchSection(TheSection);
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$non_lazy_ptr:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .indirect_symbol _foo
- MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
- OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
- MCSA_IndirectSymbol);
- // .long 0
- if (MCSym.getInt())
- // External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
- else
- // Internal to current translation unit.
- //
- // When we place the LSDA into the TEXT section, the type info
- // pointers need to be indirect and pc-rel. We accomplish this by
- // using NLPs. However, sometimes the types are local to the file. So
- // we need to fill in the value for the NLP in those cases.
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
- OutContext),
- 4/*size*/, 0/*addrspace*/);
- }
- Stubs.clear();
- OutStreamer.AddBlankLine();
- }
-
- Stubs = MMIMacho.GetHiddenGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
- EmitAlignment(2);
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$non_lazy_ptr:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .long _foo
- OutStreamer.EmitValue(MCSymbolRefExpr::
- Create(Stubs[i].second.getPointer(),
- OutContext),
- 4/*size*/, 0/*addrspace*/);
- }
- Stubs.clear();
- OutStreamer.AddBlankLine();
- }
-
- // Funny Darwin hack: This flag tells the linker that no global symbols
- // contain code that falls through to other global symbols (e.g. the obvious
- // implementation of multiple entry points). If this doesn't occur, the
- // linker can safely perform dead code stripping. Since LLVM never
- // generates code that does this, it is always safe to set.
- OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
- }
-
- if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
- MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
- StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
- MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
- OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
- }
-
- if (Subtarget->isTargetCOFF()) {
- X86COFFMachineModuleInfo &COFFMMI =
- MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-
- // Emit type information for external functions
- typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
- for (externals_iterator I = COFFMMI.externals_begin(),
- E = COFFMMI.externals_end();
- I != E; ++I) {
- OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
- OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
- << COFF::SCT_COMPLEX_TYPE_SHIFT);
- OutStreamer.EndCOFFSymbolDef();
- }
-
- // Necessary for dllexport support
- std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
-
- const TargetLoweringObjectFileCOFF &TLOFCOFF =
- static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
-
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->hasDLLExportLinkage())
- DLLExportedFns.push_back(Mang->getSymbol(I));
-
- for (Module::const_global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I)
- if (I->hasDLLExportLinkage())
- DLLExportedGlobals.push_back(Mang->getSymbol(I));
-
- // Output linker support code for dllexported globals on windows.
- if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
- OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
- SmallString<128> name;
- for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
- if (Subtarget->isTargetWindows())
- name = " /EXPORT:";
- else
- name = " -export:";
- name += DLLExportedGlobals[i]->getName();
- if (Subtarget->isTargetWindows())
- name += ",DATA";
- else
- name += ",data";
- OutStreamer.EmitBytes(name, 0);
- }
-
- for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
- if (Subtarget->isTargetWindows())
- name = " /EXPORT:";
- else
- name = " -export:";
- name += DLLExportedFns[i]->getName();
- OutStreamer.EmitBytes(name, 0);
- }
- }
- }
-
- if (Subtarget->isTargetELF()) {
- const TargetLoweringObjectFileELF &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- // Output stubs for external and common global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const TargetData *TD = TM.getTargetData();
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.EmitLabel(Stubs[i].first);
- OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(), 0);
- }
- Stubs.clear();
- }
- }
-}
-
-MachineLocation
-X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
- MachineLocation Location;
- assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
- // Frame address. Currently handles register +- offset only.
-
- if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
- else {
- DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
- }
- return Location;
-}
-
-void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
- raw_ostream &O) {
- // Only the target-dependent form of DBG_VALUE should get here.
- // Referencing the offset and metadata as NOps-2 and NOps-1 is
- // probably portable to other targets; frame pointer location is not.
- unsigned NOps = MI->getNumOperands();
- assert(NOps==7);
- O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
- // cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
- if (V.getContext().isSubprogram())
- O << DISubprogram(V.getContext()).getDisplayName() << ":";
- O << V.getName();
- O << " <- ";
- // Frame address. Currently handles register +- offset only.
- O << '[';
- if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
- printOperand(MI, 0, O);
- else
- O << "undef";
- O << '+'; printOperand(MI, 3, O);
- O << ']';
- O << "+";
- printOperand(MI, NOps-2, O);
-}
-
-
-
-//===----------------------------------------------------------------------===//
-// Target Registry Stuff
-//===----------------------------------------------------------------------===//
-
-static MCInstPrinter *createX86MCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI) {
- if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI);
- if (SyntaxVariant == 1)
- return new X86IntelInstPrinter(MAI);
- return 0;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeX86AsmPrinter() {
- RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
- RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
-
- TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
-}
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index f5f9cfa..b5fa94f 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -24,11 +24,12 @@ set(sources
X86ELFWriterInfo.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
- X86FrameInfo.cpp
+ X86FrameLowering.cpp
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
X86InstrInfo.cpp
X86JITInfo.cpp
+ X86MachObjectWriter.cpp
X86MCAsmInfo.cpp
X86MCCodeEmitter.cpp
X86MCInstLower.cpp
@@ -40,14 +41,24 @@ set(sources
)
if( CMAKE_CL_64 )
+ # A workaround for a bug in cmake 2.8.3. See PR 8885.
+ if( CMAKE_VERSION STREQUAL "2.8.3" )
+ include(CMakeDetermineCompilerId)
+ endif()
+ # end of workaround.
enable_language(ASM_MASM)
ADD_CUSTOM_COMMAND(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
+ MAIN_DEPENDENCY X86CompilationCallback_Win64.asm
COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
- DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
)
set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
endif()
add_llvm_target(X86CodeGen ${sources})
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(Utils)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 97589c0..972a0d9 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -5,7 +5,7 @@ add_llvm_library(LLVMX86Disassembler
X86DisassemblerDecoder.c
)
# workaround for hanging compilation on MSVC9 and 10
-if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
set_property(
SOURCE X86Disassembler.cpp
PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 691e2d7..f777756 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -168,16 +168,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
switch (insn.displacementSize) {
default:
break;
- case 8:
+ case 1:
type = TYPE_MOFFS8;
break;
- case 16:
+ case 2:
type = TYPE_MOFFS16;
break;
- case 32:
+ case 4:
type = TYPE_MOFFS32;
break;
- case 64:
+ case 8:
type = TYPE_MOFFS64;
break;
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 1fd6685..a9d28c9 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -290,7 +290,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
BOOL isPrefix = TRUE;
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
- uint8_t byte;
+ uint8_t byte = 0;
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
@@ -388,6 +388,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
} else {
unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
if (insn->mode == MODE_16BIT) {
@@ -511,7 +512,8 @@ static int getIDWithAttrMask(uint16_t* instructionID,
insn->opcode);
if (hasModRMExtension) {
- readModRM(insn);
+ if (readModRM(insn))
+ return -1;
*instructionID = decode(insn->opcodeType,
instructionClass,
@@ -747,7 +749,7 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibIndex = SIB_INDEX_NONE;
break;
default:
- insn->sibIndex = (EABase)(sibIndexBase + index);
+ insn->sibIndex = (SIBIndex)(sibIndexBase + index);
if (insn->sibIndex == SIB_INDEX_sib ||
insn->sibIndex == SIB_INDEX_sib64)
insn->sibIndex = SIB_INDEX_NONE;
@@ -794,7 +796,7 @@ static int readSIB(struct InternalInstruction* insn) {
}
break;
default:
- insn->sibBase = (EABase)(sibBaseBase + base);
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
}
@@ -860,7 +862,8 @@ static int readModRM(struct InternalInstruction* insn) {
if (insn->consumedModRM)
return 0;
- consumeByte(insn, &insn->modRM);
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
insn->consumedModRM = TRUE;
mod = modFromModRM(insn->modRM);
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 4f4fbcd..d0dc8b5 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -399,7 +399,7 @@ struct InternalInstruction {
/* The segment override type */
SegmentOverride segmentOverride;
- /* Sizes of various critical pieces of data */
+ /* Sizes of various critical pieces of data, in bytes */
uint8_t registerSize;
uint8_t addressSize;
uint8_t displacementSize;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index abcb716..1425b86 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -22,7 +22,7 @@
#ifndef X86DISASSEMBLERDECODERCOMMON_H
#define X86DISASSEMBLERDECODERCOMMON_H
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM x86DisassemblerContexts
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index da9d5a3..c642acc 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -16,7 +16,7 @@
#include "X86GenInstrNames.inc"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
-#include "../X86ShuffleDecode.h"
+#include "../Utils/X86ShuffleDecode.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(16, ShuffleMask);
+ DecodePUNPCKLBWMask(16, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(8, ShuffleMask);
+ DecodePUNPCKLWDMask(8, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(4, ShuffleMask);
+ DecodePUNPCKLDQMask(4, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(2, ShuffleMask);
+ DecodePUNPCKLQDQMask(2, ShuffleMask);
break;
case X86::SHUFPDrri:
@@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPDrm:
- DecodeUNPCKLPMask(2, ShuffleMask);
+ DecodeUNPCKLPDMask(2, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDrm:
+ DecodeUNPCKLPDMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDYrm:
+ DecodeUNPCKLPDMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
case X86::UNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPSrm:
- DecodeUNPCKLPMask(4, ShuffleMask);
+ DecodeUNPCKLPSMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSrm:
+ DecodeUNPCKLPSMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSYrm:
+ DecodeUNPCKLPSMask(8, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
case X86::UNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index a92a651..0484529 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -21,7 +21,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "X86GenInstrNames.inc"
-#include <ctype.h>
+#include <cctype>
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 9c6415d..12fb090 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -20,6 +20,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
X86GenCallingConv.inc X86GenSubtarget.inc \
X86GenEDInfo.inc
-DIRS = InstPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index b2116e0..f16ec02 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -923,4 +923,15 @@ The insertps's of $0 are pointless complex copies.
//===---------------------------------------------------------------------===//
+If SSE4.1 is available we should inline rounding functions instead of emitting
+a libcall.
+floor: roundsd $0x01, %xmm, %xmm
+ceil: roundsd $0x02, %xmm, %xmm
+
+and likewise for the single precision versions.
+
+Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
+corresponding nodes and some targets (including X86) aren't ready for them.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index 78c4dc0..e21d69a 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -41,50 +41,6 @@ saved a few instructions.
//===---------------------------------------------------------------------===//
-Poor codegen:
-
-int X[2];
-int b;
-void test(void) {
- memset(X, b, 2*sizeof(X[0]));
-}
-
-llc:
- movq _b@GOTPCREL(%rip), %rax
- movzbq (%rax), %rax
- movq %rax, %rcx
- shlq $8, %rcx
- orq %rax, %rcx
- movq %rcx, %rax
- shlq $16, %rax
- orq %rcx, %rax
- movq %rax, %rcx
- shlq $32, %rcx
- movq _X@GOTPCREL(%rip), %rdx
- orq %rax, %rcx
- movq %rcx, (%rdx)
- ret
-
-gcc:
- movq _b@GOTPCREL(%rip), %rax
- movabsq $72340172838076673, %rdx
- movzbq (%rax), %rax
- imulq %rdx, %rax
- movq _X@GOTPCREL(%rip), %rdx
- movq %rax, (%rdx)
- ret
-
-And the codegen is even worse for the following
-(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
- void fill1(char *s, int a)
- {
- __builtin_memset(s, a, 15);
- }
-
-For this version, we duplicate the computation of the constant to store.
-
-//===---------------------------------------------------------------------===//
-
It's not possible to reference AH, BH, CH, and DH registers in an instruction
requiring REX prefix. However, divb and mulb both produce results in AH. If isel
emits a CopyFromReg which gets turned into a movb and that can be allocated a
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index a305ae6..abd1515 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -67,19 +67,6 @@ cmovs, we should expand to a conditional branch like GCC produces.
//===---------------------------------------------------------------------===//
-Compile this:
-_Bool f(_Bool a) { return a!=1; }
-
-into:
- movzbl %dil, %eax
- xorl $1, %eax
- ret
-
-(Although note that this isn't a legal way to express the code that llvm-gcc
-currently generates for that function.)
-
-//===---------------------------------------------------------------------===//
-
Some isel ideas:
1. Dynamic programming based approach when compile time if not an
@@ -109,6 +96,37 @@ It appears icc use push for parameter passing. Need to investigate.
//===---------------------------------------------------------------------===//
+This:
+
+void foo(void);
+void bar(int x, int *P) {
+ x >>= 2;
+ if (x)
+ foo();
+ *P = x;
+}
+
+compiles into:
+
+ movq %rsi, %rbx
+ movl %edi, %r14d
+ sarl $2, %r14d
+ testl %r14d, %r14d
+ je LBB0_2
+
+Instead of doing an explicit test, we can use the flags off the sar. This
+occurs in a bigger testcase like this, which is pretty common:
+
+#include <vector>
+int test1(std::vector<int> &X) {
+ int Sum = 0;
+ for (long i = 0, e = X.size(); i != e; ++i)
+ X[i] = 0;
+ return Sum;
+}
+
+//===---------------------------------------------------------------------===//
+
Only use inc/neg/not instructions on processors where they are faster than
add/sub/xor. They are slower on the P4 due to only updating some processor
flags.
@@ -394,72 +412,8 @@ boundary to improve performance.
//===---------------------------------------------------------------------===//
-Codegen:
-
-int f(int a, int b) {
- if (a == 4 || a == 6)
- b++;
- return b;
-}
-
-
-as:
-
-or eax, 2
-cmp eax, 6
-jz label
-
-//===---------------------------------------------------------------------===//
-
GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting
-simplifications for integer "x cmp y ? a : b". For example, instead of:
-
-int G;
-void f(int X, int Y) {
- G = X < 0 ? 14 : 13;
-}
-
-compiling to:
-
-_f:
- movl $14, %eax
- movl $13, %ecx
- movl 4(%esp), %edx
- testl %edx, %edx
- cmovl %eax, %ecx
- movl %ecx, _G
- ret
-
-it could be:
-_f:
- movl 4(%esp), %eax
- sarl $31, %eax
- notl %eax
- addl $14, %eax
- movl %eax, _G
- ret
-
-etc.
-
-Another is:
-int usesbb(unsigned int a, unsigned int b) {
- return (a < b ? -1 : 0);
-}
-to:
-_usesbb:
- movl 8(%esp), %eax
- cmpl %eax, 4(%esp)
- sbbl %eax, %eax
- ret
-
-instead of:
-_usesbb:
- xorl %eax, %eax
- movl 8(%esp), %ecx
- cmpl %ecx, 4(%esp)
- movl $4294967295, %ecx
- cmovb %ecx, %eax
- ret
+simplifications for integer "x cmp y ? a : b".
//===---------------------------------------------------------------------===//
@@ -756,23 +710,17 @@ This:
{ return !full_add(a, b).second; }
Should compile to:
+ addl %esi, %edi
+ setae %al
+ movzbl %al, %eax
+ ret
-
- _Z11no_overflowjj:
- addl %edi, %esi
- setae %al
- ret
-
-FIXME: That code looks wrong; bool return is normally defined as zext.
-
-on x86-64, not:
-
-__Z11no_overflowjj:
- addl %edi, %esi
- cmpl %edi, %esi
- setae %al
- movzbl %al, %eax
- ret
+on x86-64, instead of the rather stupid-looking:
+ addl %esi, %edi
+ setb %al
+ xorb $1, %al
+ movzbl %al, %eax
+ ret
//===---------------------------------------------------------------------===//
@@ -1040,10 +988,10 @@ _foo:
instead of:
_foo:
- movl $255, %eax
- orl 4(%esp), %eax
- andl $65535, %eax
- ret
+ movl $65280, %eax
+ andl 4(%esp), %eax
+ orl $255, %eax
+ ret
//===---------------------------------------------------------------------===//
@@ -1165,58 +1113,6 @@ abs:
//===---------------------------------------------------------------------===//
-Consider:
-int test(unsigned long a, unsigned long b) { return -(a < b); }
-
-We currently compile this to:
-
-define i32 @test(i32 %a, i32 %b) nounwind {
- %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1]
- %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- %tmp5 = sub i32 0, %tmp34 ; <i32> [#uses=1]
- ret i32 %tmp5
-}
-
-and
-
-_test:
- movl 8(%esp), %eax
- cmpl %eax, 4(%esp)
- setb %al
- movzbl %al, %eax
- negl %eax
- ret
-
-Several deficiencies here. First, we should instcombine zext+neg into sext:
-
-define i32 @test2(i32 %a, i32 %b) nounwind {
- %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1]
- %tmp34 = sext i1 %tmp3 to i32 ; <i32> [#uses=1]
- ret i32 %tmp34
-}
-
-However, before we can do that, we have to fix the bad codegen that we get for
-sext from bool:
-
-_test2:
- movl 8(%esp), %eax
- cmpl %eax, 4(%esp)
- setb %al
- movzbl %al, %eax
- shll $31, %eax
- sarl $31, %eax
- ret
-
-This code should be at least as good as the code above. Once this is fixed, we
-can optimize this specific case even more to:
-
- movl 8(%esp), %eax
- xorl %ecx, %ecx
- cmpl %eax, 4(%esp)
- sbbl %ecx, %ecx
-
-//===---------------------------------------------------------------------===//
-
Take the following code (from
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541):
@@ -1605,6 +1501,8 @@ loop, the value comes into the loop as two values, and
RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
constructed BUILD_PAIR which represents the cast value.
+This can be handled by making CodeGenPrepare sink the cast.
+
//===---------------------------------------------------------------------===//
Test instructions can be eliminated by using EFLAGS values from arithmetic
@@ -1736,46 +1634,6 @@ Ideal output:
//===---------------------------------------------------------------------===//
-Testcase:
-int x(int a) { return (a & 0x80) ? 0x100 : 0; }
-int y(int a) { return (a & 0x80) *2; }
-
-Current:
- testl $128, 4(%esp)
- setne %al
- movzbl %al, %eax
- shll $8, %eax
- ret
-
-Better:
- movl 4(%esp), %eax
- addl %eax, %eax
- andl $256, %eax
- ret
-
-This is another general instcombine transformation that is profitable on all
-targets. In LLVM IR, these functions look like this:
-
-define i32 @x(i32 %a) nounwind readnone {
-entry:
- %0 = and i32 %a, 128
- %1 = icmp eq i32 %0, 0
- %iftmp.0.0 = select i1 %1, i32 0, i32 256
- ret i32 %iftmp.0.0
-}
-
-define i32 @y(i32 %a) nounwind readnone {
-entry:
- %0 = shl i32 %a, 1
- %1 = and i32 %0, 256
- ret i32 %1
-}
-
-Replacing an icmp+select with a shift should always be considered profitable in
-instcombine.
-
-//===---------------------------------------------------------------------===//
-
Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch
properly.
@@ -1960,3 +1818,132 @@ load, making it non-trivial to determine if there's anything between
the load and the store which would prohibit narrowing.
//===---------------------------------------------------------------------===//
+
+This code:
+void foo(unsigned x) {
+ if (x == 0) bar();
+ else if (x == 1) qux();
+}
+
+currently compiles into:
+_foo:
+ movl 4(%esp), %eax
+ cmpl $1, %eax
+ je LBB0_3
+ testl %eax, %eax
+ jne LBB0_4
+
+the testl could be removed:
+_foo:
+ movl 4(%esp), %eax
+ cmpl $1, %eax
+ je LBB0_3
+ jb LBB0_4
+
+0 is the only unsigned number < 1.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+%0 = type { i32, i1 }
+
+define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+ %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x)
+ %cmp = extractvalue %0 %uadd, 1
+ %inc = zext i1 %cmp to i32
+ %add = add i32 %x, %sum
+ %z.0 = add i32 %add, %inc
+ ret i32 %z.0
+}
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+compiles to:
+
+_add32carry: ## @add32carry
+ addl %esi, %edi
+ sbbl %ecx, %ecx
+ movl %edi, %eax
+ subl %ecx, %eax
+ ret
+
+But it could be:
+
+_add32carry:
+ leal (%rsi,%rdi), %eax
+ cmpl %esi, %eax
+ adcl $0, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+The hot loop of 256.bzip2 contains code that looks a bit like this:
+
+int foo(char *P, char *Q, int x, int y) {
+ if (P[0] != Q[0])
+ return P[0] < Q[0];
+ if (P[1] != Q[1])
+ return P[1] < Q[1];
+ if (P[2] != Q[2])
+ return P[2] < Q[2];
+ return P[3] < Q[3];
+}
+
+In the real code, we get a lot more wrong than this. However, even in this
+code we generate:
+
+_foo: ## @foo
+## BB#0: ## %entry
+ movb (%rsi), %al
+ movb (%rdi), %cl
+ cmpb %al, %cl
+ je LBB0_2
+LBB0_1: ## %if.then
+ cmpb %al, %cl
+ jmp LBB0_5
+LBB0_2: ## %if.end
+ movb 1(%rsi), %al
+ movb 1(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#3: ## %if.end38
+ movb 2(%rsi), %al
+ movb 2(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#4: ## %if.end60
+ movb 3(%rdi), %al
+ cmpb 3(%rsi), %al
+LBB0_5: ## %if.end60
+ setl %al
+ movzbl %al, %eax
+ ret
+
+Note that we generate jumps to LBB0_1 which does a redundant compare. The
+redundant compare also forces the register values to be live, which prevents
+folding one of the loads into the compare. In contrast, GCC 4.2 produces:
+
+_foo:
+ movzbl (%rsi), %eax
+ cmpb %al, (%rdi)
+ jne L10
+L12:
+ movzbl 1(%rsi), %eax
+ cmpb %al, 1(%rdi)
+ jne L10
+ movzbl 2(%rsi), %eax
+ cmpb %al, 2(%rdi)
+ jne L10
+ movzbl 3(%rdi), %eax
+ cmpb 3(%rsi), %al
+L10:
+ setl %al
+ movzbl %al, %eax
+ ret
+
+which is "perfect".
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
new file mode 100644
index 0000000..3ad5f99
--- /dev/null
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Utils
+ X86ShuffleDecode.cpp
+ )
+add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Utils/Makefile b/lib/Target/X86/Utils/Makefile
new file mode 100644
index 0000000..1df6f0f
--- /dev/null
+++ b/lib/Target/X86/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Utils
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
new file mode 100644
index 0000000..cd06060
--- /dev/null
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -0,0 +1,190 @@
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4+CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts+i);
+
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(NElts+i);
+}
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+}
+
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back(4+(Imm & 3));
+ Imm >>= 2;
+ }
+}
+
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm & 3));
+ Imm >>= 2;
+ }
+ ShuffleMask.push_back(4);
+ ShuffleMask.push_back(5);
+ ShuffleMask.push_back(6);
+ ShuffleMask.push_back(7);
+}
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(VT, ShuffleMask);
+}
+
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2);
+ ShuffleMask.push_back(i+NElts+NElts/2);
+ }
+}
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts + NElts);
+ Imm /= NElts;
+ }
+}
+
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2); // Reads from dest
+ ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
+ }
+}
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts / 2;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start; i != End; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
+ }
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
+ }
+}
+
+} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 0000000..b18f670
--- /dev/null
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,87 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+ SM_SentinelZero = ~0U
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 27e8850..0ca4366 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -15,6 +15,7 @@
#ifndef TARGET_X86_H
#define TARGET_X86_H
+#include "llvm/Support/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -23,11 +24,13 @@ class FunctionPass;
class JITCodeEmitter;
class MCCodeEmitter;
class MCContext;
+class MCObjectWriter;
class MachineCodeEmitter;
class Target;
class TargetAsmBackend;
class X86TargetMachine;
class formatted_raw_ostream;
+class raw_ostream;
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
@@ -74,6 +77,13 @@ FunctionPass *createEmitX86CodeToMemory();
///
FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
extern Target TheX86_32Target, TheX86_64Target;
} // End llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 923f894..efb6c8c 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
+def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+ "Support POPCNT instruction">;
+
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
@@ -45,7 +48,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
[FeatureSSSE3]>;
def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
- [FeatureSSE41]>;
+ [FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions">;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
@@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
"IsUAMemFast", "true",
"Fast unaligned memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
- "Support SSE 4a instructions">;
+ "Support SSE 4a instructions",
+ [FeaturePOPCNT]>;
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
"Enable AVX instructions">;
@@ -112,11 +116,13 @@ def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem, FeatureAES]>;
-// Sandy Bridge does not have FMA
-// FIXME: Wikipedia says it does... it should have AES as well.
-def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
+def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
+ FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 8910c42..da5f5b1 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -12,52 +12,82 @@
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCObjectFormat.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
-
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: assert(0 && "invalid fixup kind!");
- case X86::reloc_pcrel_1byte:
+ case FK_PCRel_1:
case FK_Data_1: return 0;
- case X86::reloc_pcrel_2byte:
+ case FK_PCRel_2:
case FK_Data_2: return 1;
- case X86::reloc_pcrel_4byte:
+ case FK_PCRel_4:
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
case X86::reloc_signed_4byte:
case X86::reloc_global_offset_table:
case FK_Data_4: return 2;
+ case FK_PCRel_8:
case FK_Data_8: return 3;
}
}
namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
class X86AsmBackend : public TargetAsmBackend {
public:
X86AsmBackend(const Target &T)
- : TargetAsmBackend(T) {}
+ : TargetAsmBackend() {}
+
+ unsigned getNumFixupKinds() const {
+ return X86::NumTargetFixupKinds;
+ }
- void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF,
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+ { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+ { "reloc_signed_4byte", 0, 4 * 8, 0},
+ { "reloc_global_offset_table", 0, 4 * 8, 0}
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
- assert(Fixup.getOffset() + Size <= DF.getContents().size() &&
+ assert(Fixup.getOffset() + Size <= DataSize &&
"Invalid fixup offset!");
for (unsigned i = 0; i != Size; ++i)
- DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+ Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
bool MayNeedRelaxation(const MCInst &Inst) const;
@@ -153,6 +183,9 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
case X86::CMP32mi8: return X86::CMP32mi;
case X86::CMP64ri8: return X86::CMP64ri32;
case X86::CMP64mi8: return X86::CMP64mi32;
+
+ // PUSH
+ case X86::PUSHi8: return X86::PUSHi32;
}
}
@@ -211,10 +244,8 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
/// WriteNopData - Write optimal nops to the output file for the \arg Count
/// bytes. This returns the number of bytes written. It may return 0 if
/// the \arg Count is more than the maximum optimal nops.
-///
-/// FIXME this is X86 32-bit specific and should move to a better place.
bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
- static const uint8_t Nops[16][16] = {
+ static const uint8_t Nops[10][10] = {
// nop
{0x90},
// xchg %ax,%ax
@@ -235,30 +266,16 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
{0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
// nopw %cs:0L(%[re]ax,%[re]ax,1)
{0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
- // nopw %cs:0L(%[re]ax,%[re]ax,1)
- {0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- // nopw 0(%[re]ax,%[re]ax,1)
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- // nopl 0L(%[re]ax) */
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- // nopl 0L(%[re]ax)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- // nopl 0L(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
};
// Write an optimal sequence for the first 15 bytes.
- uint64_t OptimalCount = (Count < 16) ? Count : 15;
- for (uint64_t i = 0, e = OptimalCount; i != e; i++)
- OW->Write8(Nops[OptimalCount - 1][i]);
+ const uint64_t OptimalCount = (Count < 16) ? Count : 15;
+ const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
+ for (uint64_t i = 0, e = Prefixes; i != e; i++)
+ OW->Write8(0x66);
+ const uint64_t Rest = OptimalCount - Prefixes;
+ for (uint64_t i = 0, e = Rest; i != e; i++)
+ OW->Write8(Nops[Rest - 1][i]);
// Finish with single byte nops.
for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
@@ -271,28 +288,16 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
namespace {
class ELFX86AsmBackend : public X86AsmBackend {
- MCELFObjectFormat Format;
-
public:
Triple::OSType OSType;
ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
: X86AsmBackend(T), OSType(_OSType) {
- HasScatteredSymbols = true;
HasReliableSymbolDifference = true;
}
- virtual const MCObjectFormat &getObjectFormat() const {
- return Format;
- }
-
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
- return ES.getFlags() & MCSectionELF::SHF_MERGE;
- }
-
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
- return SE.getType() == MCSectionELF::SHT_NOBITS;
+ return ES.getFlags() & ELF::SHF_MERGE;
}
};
@@ -301,15 +306,10 @@ public:
ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
: ELFX86AsmBackend(T, OSType) {}
- unsigned getPointerSize() const {
- return 4;
- }
-
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(OS, /*Is64Bit=*/false,
- OSType, ELF::EM_386,
- /*IsLittleEndian=*/true,
- /*HasRelocationAddend=*/false);
+ return createELFObjectWriter(new X86ELFObjectWriter(false, OSType,
+ ELF::EM_386, false),
+ OS, /*IsLittleEndian*/ true);
}
};
@@ -318,69 +318,31 @@ public:
ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
: ELFX86AsmBackend(T, OSType) {}
- unsigned getPointerSize() const {
- return 8;
- }
-
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(OS, /*Is64Bit=*/true,
- OSType, ELF::EM_X86_64,
- /*IsLittleEndian=*/true,
- /*HasRelocationAddend=*/true);
+ return createELFObjectWriter(new X86ELFObjectWriter(true, OSType,
+ ELF::EM_X86_64, true),
+ OS, /*IsLittleEndian*/ true);
}
};
class WindowsX86AsmBackend : public X86AsmBackend {
bool Is64Bit;
- MCCOFFObjectFormat Format;
public:
WindowsX86AsmBackend(const Target &T, bool is64Bit)
: X86AsmBackend(T)
, Is64Bit(is64Bit) {
- HasScatteredSymbols = true;
- }
-
- virtual const MCObjectFormat &getObjectFormat() const {
- return Format;
- }
-
- unsigned getPointerSize() const {
- if (Is64Bit)
- return 8;
- else
- return 4;
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createWinCOFFObjectWriter(OS, Is64Bit);
}
-
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section);
- return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
- }
};
class DarwinX86AsmBackend : public X86AsmBackend {
- MCMachOObjectFormat Format;
-
public:
DarwinX86AsmBackend(const Target &T)
- : X86AsmBackend(T) {
- HasScatteredSymbols = true;
- }
-
- virtual const MCObjectFormat &getObjectFormat() const {
- return Format;
- }
-
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
- return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_GB_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
- }
+ : X86AsmBackend(T) { }
};
class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
@@ -388,14 +350,10 @@ public:
DarwinX86_32AsmBackend(const Target &T)
: DarwinX86AsmBackend(T) {}
- unsigned getPointerSize() const {
- return 4;
- }
-
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPUTypeI386,
- MachO::CPUSubType_I386_ALL,
- /*IsLittleEndian=*/true);
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_i386,
+ object::mach::CSX86_ALL);
}
};
@@ -406,14 +364,10 @@ public:
HasReliableSymbolDifference = true;
}
- unsigned getPointerSize() const {
- return 8;
- }
-
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createMachObjectWriter(OS, /*Is64Bit=*/true, MachO::CPUTypeX86_64,
- MachO::CPUSubType_I386_ALL,
- /*IsLittleEndian=*/true);
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+ object::mach::CTM_x86_64,
+ object::mach::CSX86_ALL);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -460,7 +414,10 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
case Triple::MinGW32:
case Triple::Cygwin:
case Triple::Win32:
- return new WindowsX86AsmBackend(T, false);
+ if (Triple(TT).getEnvironment() == Triple::MachO)
+ return new DarwinX86_32AsmBackend(T);
+ else
+ return new WindowsX86AsmBackend(T, false);
default:
return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
}
@@ -471,10 +428,13 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
- case Triple::MinGW64:
+ case Triple::MinGW32:
case Triple::Cygwin:
case Triple::Win32:
- return new WindowsX86AsmBackend(T, true);
+ if (Triple(TT).getEnvironment() == Triple::MachO)
+ return new DarwinX86_64AsmBackend(T);
+ else
+ return new WindowsX86AsmBackend(T, true);
default:
return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
}
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 56863d9..5635175 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
// weirdly; this is really the sse-regparm calling convention) in which
// case they use XMM0, otherwise it is the same as the common X86 calling
// conv.
- CCIfInReg<CCIfSubtarget<"hasSSE2()",
+ CCIfInReg<CCIfSubtarget<"hasXMMInt()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
CCDelegateTo<RetCC_X86Common>
@@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
// SSE2.
// This can happen when a float, 2 x float, or 3 x float vector is split by
// target lowering, and is returned in 1-3 sse regs.
- CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
- CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
// For integers, ECX can be used as an extra return register
CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
@@ -163,12 +163,12 @@ def CC_X86_64_C : CallingConv<[
// registers on Darwin.
CCIfType<[x86mmx],
CCIfSubtarget<"isTargetDarwin()",
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCPromoteToType<v2i64>>>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
+ CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
// The first 8 256-bit vector arguments are passed in YMM registers.
@@ -215,6 +215,13 @@ def CC_X86_Win64_C : CallingConv<[
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Do not pass the sret argument in RCX, the Win64 thiscall calling
+ // convention requires "this" to be passed in RCX.
+ CCIfCC<"CallingConv::X86_ThisCall",
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ],
+ [XMM1, XMM2, XMM3]>>>>,
+
CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
[XMM0, XMM1, XMM2, XMM3]>>,
@@ -245,7 +252,7 @@ def CC_X86_64_GHC : CallingConv<[
// Pass in STG registers: F1, F2, F3, F4, D1, D2
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
+ CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
@@ -263,7 +270,7 @@ def CC_X86_32_Common : CallingConv<[
// The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
@@ -362,7 +369,7 @@ def CC_X86_32_FastCC : CallingConv<[
// The first 3 float or double arguments, if the call is not a vararg
// call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfType<[f32,f64],
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
// Doubles get 8-byte slots that are 8-byte aligned.
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 3b721b4..f1d7ede 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,6 +14,7 @@
#include "X86ELFWriterInfo.h"
#include "X86Relocations.h"
#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -35,13 +36,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
if (is64Bit) {
switch(MachineRelTy) {
case X86::reloc_pcrel_word:
- return R_X86_64_PC32;
+ return ELF::R_X86_64_PC32;
case X86::reloc_absolute_word:
- return R_X86_64_32;
+ return ELF::R_X86_64_32;
case X86::reloc_absolute_word_sext:
- return R_X86_64_32S;
+ return ELF::R_X86_64_32S;
case X86::reloc_absolute_dword:
- return R_X86_64_64;
+ return ELF::R_X86_64_64;
case X86::reloc_picrel_word:
default:
llvm_unreachable("unknown x86_64 machine relocation type");
@@ -49,9 +50,9 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
} else {
switch(MachineRelTy) {
case X86::reloc_pcrel_word:
- return R_386_PC32;
+ return ELF::R_386_PC32;
case X86::reloc_absolute_word:
- return R_386_32;
+ return ELF::R_386_32;
case X86::reloc_absolute_word_sext:
case X86::reloc_absolute_dword:
case X86::reloc_picrel_word:
@@ -66,18 +67,18 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
long int Modifier) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32: return Modifier - 4;
- case R_X86_64_32:
- case R_X86_64_32S:
- case R_X86_64_64:
+ case ELF::R_X86_64_PC32: return Modifier - 4;
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
return Modifier;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32: return Modifier - 4;
- case R_386_32: return Modifier;
+ case ELF::R_386_PC32: return Modifier - 4;
+ case ELF::R_386_32: return Modifier;
default:
llvm_unreachable("unknown x86 relocation type");
}
@@ -88,19 +89,19 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32:
- case R_X86_64_32:
- case R_X86_64_32S:
+ case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
return 32;
- case R_X86_64_64:
+ case ELF::R_X86_64_64:
return 64;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32:
- case R_386_32:
+ case ELF::R_386_PC32:
+ case ELF::R_386_32:
return 32;
default:
llvm_unreachable("unknown x86 relocation type");
@@ -112,20 +113,20 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32:
+ case ELF::R_X86_64_PC32:
return true;
- case R_X86_64_32:
- case R_X86_64_32S:
- case R_X86_64_64:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
return false;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32:
+ case ELF::R_386_PC32:
return true;
- case R_386_32:
+ case ELF::R_386_32:
return false;
default:
llvm_unreachable("unknown x86 relocation type");
@@ -143,7 +144,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
unsigned RelOffset,
unsigned RelTy) const {
- if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32)
+ if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
return SymOffset - (RelOffset + 4);
else
assert("computeRelocation unknown for this relocation type");
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 7d37d95..a45b5bb 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,23 +20,6 @@ namespace llvm {
class X86ELFWriterInfo : public TargetELFWriterInfo {
- // ELF Relocation types for X86
- enum X86RelocationType {
- R_386_NONE = 0,
- R_386_32 = 1,
- R_386_PC32 = 2
- };
-
- // ELF Relocation types for X86_64
- enum X86_64RelocationType {
- R_X86_64_NONE = 0,
- R_X86_64_64 = 1,
- R_X86_64_PC32 = 2,
- R_X86_64_32 = 10,
- R_X86_64_32S = 11,
- R_X86_64_PC64 = 24
- };
-
public:
X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
virtual ~X86ELFWriterInfo();
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b938539..6fa9284 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -36,7 +36,7 @@
using namespace llvm;
namespace {
-
+
class X86FastISel : public FastISel {
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -46,7 +46,7 @@ class X86FastISel : public FastISel {
///
unsigned StackPtr;
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
/// When SSE2 is available, use it for f64 operations.
@@ -69,12 +69,12 @@ public:
/// possible.
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
-
+
#include "X86GenFastISel.inc"
private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
-
+
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
bool X86FastEmitStore(EVT VT, const Value *Val,
@@ -84,12 +84,12 @@ private:
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
-
+
bool X86SelectAddress(const Value *V, X86AddressMode &AM);
bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
bool X86SelectLoad(const Instruction *I);
-
+
bool X86SelectStore(const Instruction *I);
bool X86SelectRet(const Instruction *I);
@@ -105,7 +105,7 @@ private:
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
-
+
bool X86SelectFPExt(const Instruction *I);
bool X86SelectFPTrunc(const Instruction *I);
@@ -134,7 +134,7 @@ private:
bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
};
-
+
} // end anonymous namespace.
bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
@@ -250,7 +250,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
break;
}
-
+
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc)), AM).addReg(Val);
return true;
@@ -261,7 +261,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
-
+
// If this is a store of a simple constant, fold the constant into the store.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
unsigned Opc = 0;
@@ -278,7 +278,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
Opc = X86::MOV64mi32;
break;
}
-
+
if (Opc) {
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc)), AM)
@@ -287,11 +287,11 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
return true;
}
}
-
+
unsigned ValReg = getRegForValue(Val);
if (ValReg == 0)
- return false;
-
+ return false;
+
return X86FastEmitStore(VT, ValReg, AM);
}
@@ -303,7 +303,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned &ResultReg) {
unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
Src, /*TODO: Kill=*/false);
-
+
if (RR != 0) {
ResultReg = RR;
return true;
@@ -320,11 +320,11 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Don't walk into other basic blocks; it's possible we haven't
// visited them yet, so the instructions may not yet be assigned
// virtual registers.
- if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
- return false;
-
- Opcode = I->getOpcode();
- U = I;
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
@@ -438,7 +438,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
AM.Disp = (uint32_t)Disp;
if (X86SelectAddress(U->getOperand(0), AM))
return true;
-
+
// If we couldn't merge the sub value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
@@ -467,7 +467,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Okay, we've committed to selecting this global. Set up the basic address.
AM.GV = GV;
-
+
// Allow the subtarget to classify the global.
unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
@@ -476,7 +476,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// FIXME: How do we know Base.Reg is free??
AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
}
-
+
// Unless the ABI requires an extra load, return a direct reference to
// the global.
if (!isGlobalStubReference(GVFlags)) {
@@ -489,7 +489,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
AM.GVOpFlags = GVFlags;
return true;
}
-
+
// Ok, we need to do a load from a stub. If we've already loaded from this
// stub, reuse the loaded pointer, otherwise emit the load now.
DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
@@ -511,14 +511,14 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
-
+
if (Subtarget->isPICStyleRIPRel())
StubAM.Base.Reg = X86::RIP;
} else {
Opc = X86::MOV32rm;
RC = X86::GR32RegisterClass;
}
-
+
LoadReg = createResultReg(RC);
MachineInstrBuilder LoadMI =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
@@ -530,7 +530,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Prevent loading GV stub multiple times in same MBB.
LocalValueMap[V] = LoadReg;
}
-
+
// Now construct the final address. Note that the Disp, Scale,
// and Index values may already be set here.
AM.Base.Reg = LoadReg;
@@ -597,14 +597,18 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
(AM.Base.Reg != 0 || AM.IndexReg != 0))
return false;
- // Can't handle TLS or DLLImport.
+ // Can't handle DLLImport.
+ if (GV->hasDLLImportLinkage())
+ return false;
+
+ // Can't handle TLS.
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
+ if (GVar->isThreadLocal())
return false;
// Okay, we've committed to selecting this global. Set up the basic address.
AM.GV = GV;
-
+
// No ABI requires an extra load for anything other than DLLImport, which
// we rejected above. Return a direct reference to the global.
if (Subtarget->isPICStyleRIPRel()) {
@@ -617,7 +621,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
} else if (Subtarget->isPICStyleGOT()) {
AM.GVOpFlags = X86II::MO_GOTOFF;
}
-
+
return true;
}
@@ -702,7 +706,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
return false;
CCValAssign &VA = ValLocs[0];
-
+
// Don't bother handling odd stuff for now.
if (VA.getLocInfo() != CCValAssign::Full)
return false;
@@ -792,11 +796,11 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
EVT VT) {
unsigned Op0Reg = getRegForValue(Op0);
if (Op0Reg == 0) return false;
-
+
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Op1))
Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
-
+
// We have two options: compare with register or immediate. If the RHS of
// the compare is an immediate that we can fold into this compare, use
// CMPri, otherwise use CMPrr.
@@ -808,16 +812,16 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
return true;
}
}
-
+
unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
if (CompareOpc == 0) return false;
-
+
unsigned Op1Reg = getRegForValue(Op1);
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
.addReg(Op0Reg)
.addReg(Op1Reg);
-
+
return true;
}
@@ -835,13 +839,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
case CmpInst::FCMP_OEQ: {
if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
return false;
-
+
unsigned EReg = createResultReg(&X86::GR8RegClass);
unsigned NPReg = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::SETNPr), NPReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -874,7 +878,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
-
+
case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
@@ -896,7 +900,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
// Emit a compare of Op0/Op1.
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
-
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -961,7 +965,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
-
+
case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
@@ -975,7 +979,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
default:
return false;
}
-
+
const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
if (SwapArgs)
std::swap(Op0, Op1);
@@ -983,7 +987,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
-
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
.addMBB(TrueMBB);
@@ -1036,8 +1040,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
const TargetInstrDesc &TID = MI.getDesc();
- if (TID.hasUnmodeledSideEffects() ||
- TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
+ if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
+ MI.hasUnmodeledSideEffects())
break;
}
@@ -1119,16 +1123,16 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
-
+
// Fold immediate in shl(x,3).
if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
UpdateValueMap(I, ResultReg);
return true;
}
-
+
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1152,10 +1156,10 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
MVT VT;
if (!isTypeLegal(I->getType(), VT))
return false;
-
+
// We only use cmov here, if we don't have a cmov instruction bail.
if (!Subtarget->hasCMov()) return false;
-
+
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
if (VT == MVT::i16) {
@@ -1168,7 +1172,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
Opc = X86::CMOVE64rr;
RC = &X86::GR64RegClass;
} else {
- return false;
+ return false;
}
unsigned Op0Reg = getRegForValue(I->getOperand(0));
@@ -1233,7 +1237,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
// This code only handles truncation to byte right now.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
// All other cases should be handled by the tblgen generated code.
@@ -1304,21 +1308,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Grab the frame index.
X86AddressMode AM;
if (!X86SelectAddress(Slot, AM)) return false;
-
+
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
-
+
return true;
}
case Intrinsic::objectsize: {
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
-
+
assert(CI && "Non-constant type in Intrinsic::objectsize?");
-
+
MVT VT;
if (!isTypeLegal(Ty, VT))
return false;
-
+
unsigned OpC = 0;
if (VT == MVT::i32)
OpC = X86::MOV32ri;
@@ -1326,7 +1330,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
OpC = X86::MOV64ri;
else
return false;
-
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
addImm(CI->isZero() ? -1ULL : 0);
@@ -1398,7 +1402,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
ResultReg = DestReg1+1;
else
ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
-
+
unsigned Opc = X86::SETBr;
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
Opc = X86::SETOr;
@@ -1516,10 +1520,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
-
+
// Allocate shadow area for Win64
- if (Subtarget->isTargetWin64()) {
- CCInfo.AllocateStack(32, 8);
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
}
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
@@ -1539,7 +1543,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CCValAssign &VA = ArgLocs[i];
unsigned Arg = Args[VA.getValNo()];
EVT ArgVT = ArgVTs[VA.getValNo()];
-
+
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -1547,16 +1551,14 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
case CCValAssign::SExt: {
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
- Emitted = true;
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::ZExt: {
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
- Emitted = true;
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
@@ -1572,21 +1574,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (!Emitted)
Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
-
- assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::BCvt: {
unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
- ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
+ ISD::BITCAST, Arg, /*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
ArgVT = VA.getLocVT();
break;
}
}
-
+
if (VA.isRegLoc()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
VA.getLocReg()).addReg(Arg);
@@ -1597,7 +1599,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
AM.Base.Reg = StackPtr;
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
-
+
// If this is a really simple value, emit this with the Value* version of
// X86FastEmitStore. If it isn't simple, we don't want to do this, as it
// can cause us to reevaluate the argument.
@@ -1609,13 +1611,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
// ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
+ // GOT pointer.
if (Subtarget->isPICStyleGOT()) {
unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
X86::EBX).addReg(Base);
}
-
+
// Issue the call.
MachineInstrBuilder MIB;
if (CalleeOp) {
@@ -1629,7 +1631,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CallOpc = X86::CALL32r;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addReg(CalleeOp);
-
+
} else {
// Direct call.
assert(GV && "Not a direct call");
@@ -1640,10 +1642,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CallOpc = X86::CALL64pcrel32;
else
CallOpc = X86::CALLpcrel32;
-
+
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = 0;
-
+
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
// external symbols most go through the PLT in PIC mode. If the symbol
// has hidden or protected visibility, or if it is static or local, then
@@ -1660,8 +1662,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
}
-
-
+
+
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addGlobalAddress(GV, 0, OpFlags);
}
@@ -1690,7 +1692,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
EVT CopyVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
-
+
// If this is a call to a function that returns an fp value on the x87 fp
// stack, but where we prefer to use the value in xmm registers, copy it
// out as F80 and use a truncate to move it from fp stack reg to xmm reg.
@@ -1728,7 +1730,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (AndToI1) {
// Mask out all but lowest bit for some call which produces an i1.
unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
ResultReg = AndResult;
}
@@ -1798,7 +1800,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
MVT VT;
if (!isTypeLegal(C->getType(), VT))
return false;
-
+
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
@@ -1843,7 +1845,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
// No f80 support yet.
return false;
}
-
+
// Materialize addresses with LEA instructions.
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
@@ -1859,14 +1861,14 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
}
return 0;
}
-
+
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(C->getType());
if (Align == 0) {
// Alignment of vector types. FIXME!
Align = TD.getTypeAllocSize(C->getType());
}
-
+
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
unsigned char OpFlag = 0;
@@ -1922,20 +1924,20 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
X86AddressMode AM;
if (!X86SelectAddress(LI->getOperand(0), AM))
return false;
-
+
X86InstrInfo &XII = (X86InstrInfo&)TII;
-
+
unsigned Size = TD.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);
-
+
MachineInstr *Result =
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
if (Result == 0) return false;
-
- MI->getParent()->insert(MI, Result);
+
+ FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
return true;
}
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
index 64ee3eb..17d242a 100644
--- a/lib/Target/X86/X86FixupKinds.h
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -15,17 +15,17 @@
namespace llvm {
namespace X86 {
enum Fixups {
- reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
- reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1
- reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw
- reloc_riprel_4byte, // 32-bit rip-relative
+ reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
reloc_riprel_4byte_movq_load, // 32-bit rip-relative in movq
reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4
// this will be sign extended at
// runtime.
- reloc_global_offset_table // 32-bit, relative to the start
+ reloc_global_offset_table, // 32-bit, relative to the start
// of the instruction. Used only
// for _GLOBAL_OFFSET_TABLE_.
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
}
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 5da6d3a..3aaa693 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -32,6 +32,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -51,6 +52,7 @@ namespace {
struct FPS : public MachineFunctionPass {
static char ID;
FPS() : MachineFunctionPass(ID) {
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
// This is really only to keep valgrind quiet.
// The logic in isLive() is too much for it.
memset(Stack, 0, sizeof(Stack));
@@ -59,6 +61,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<EdgeBundles>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -94,7 +97,7 @@ namespace {
// FixStack[i] == getStackEntry(i) for all i < FixCount.
unsigned char FixStack[8];
- LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {}
+ LiveBundle() : Mask(0), FixCount(0) {}
// Have the live registers been assigned a stack order yet?
bool isFixed() const { return !Mask || FixCount; }
@@ -104,10 +107,8 @@ namespace {
// with no live FP registers.
SmallVector<LiveBundle, 8> LiveBundles;
- // Map each MBB in the current function to an (ingoing, outgoing) index into
- // LiveBundles. Blocks with no FP registers live in or out map to (0, 0)
- // and are not actually stored in the map.
- DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle;
+ // The edge bundle analysis provides indices into the LiveBundles vector.
+ EdgeBundles *Bundles;
// Return a bitmask of FP registers in block's live-in list.
unsigned calcLiveInMask(MachineBasicBlock *MBB) {
@@ -284,6 +285,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// Early exit.
if (!FPIsUsed) return false;
+ Bundles = &getAnalysis<EdgeBundles>();
TII = MF.getTarget().getInstrInfo();
// Prepare cross-MBB liveness.
@@ -308,7 +310,6 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
if (Processed.insert(BB))
Changed |= processBasicBlock(MF, *BB);
- BlockBundle.clear();
LiveBundles.clear();
return Changed;
@@ -321,90 +322,16 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
/// registers may be implicitly defined, or not used by all successors.
void FPS::bundleCFG(MachineFunction &MF) {
assert(LiveBundles.empty() && "Stale data in LiveBundles");
- assert(BlockBundle.empty() && "Stale data in BlockBundle");
- SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp;
+ LiveBundles.resize(Bundles->getNumBundles());
- // LiveBundle[0] is the empty live-in set.
- LiveBundles.resize(1);
-
- // First gather the actual live-in masks for all MBBs.
+ // Gather the actual live-in masks for all MBBs.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = I;
const unsigned Mask = calcLiveInMask(MBB);
if (!Mask)
continue;
- // Ingoing bundle index.
- unsigned &Idx = BlockBundle[MBB].first;
- // Already assigned an ingoing bundle?
- if (Idx)
- continue;
- // Allocate a new LiveBundle struct for this block's live-ins.
- const unsigned BundleIdx = Idx = LiveBundles.size();
- DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#"
- << MBB->getNumber());
- LiveBundles.push_back(Mask);
- LiveBundle &Bundle = LiveBundles.back();
-
- // Make sure all predecessors have the same live-out set.
- PropUp.insert(MBB);
-
- // Keep pushing liveness up and down the CFG until convergence.
- // Only critical edges cause iteration here, but when they do, multiple
- // blocks can be assigned to the same LiveBundle index.
- do {
- // Assign BundleIdx as liveout from predecessors in PropUp.
- for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(),
- E = PropUp.end(); I != E; ++I) {
- MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(),
- LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) {
- MachineBasicBlock *PredMBB = *LinkI;
- // PredMBB's liveout bundle should be set to LIIdx.
- unsigned &Idx = BlockBundle[PredMBB].second;
- if (Idx) {
- assert(Idx == BundleIdx && "Inconsistent CFG");
- continue;
- }
- Idx = BundleIdx;
- DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber());
- // Propagate to siblings.
- if (PredMBB->succ_size() > 1)
- PropDown.insert(PredMBB);
- }
- }
- PropUp.clear();
-
- // Assign BundleIdx as livein to successors in PropDown.
- for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(),
- E = PropDown.end(); I != E; ++I) {
- MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(),
- LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) {
- MachineBasicBlock *SuccMBB = *LinkI;
- // LinkMBB's livein bundle should be set to BundleIdx.
- unsigned &Idx = BlockBundle[SuccMBB].first;
- if (Idx) {
- assert(Idx == BundleIdx && "Inconsistent CFG");
- continue;
- }
- Idx = BundleIdx;
- DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber());
- // Propagate to siblings.
- if (SuccMBB->pred_size() > 1)
- PropUp.insert(SuccMBB);
- // Also accumulate the bundle liveness mask from the liveins here.
- Bundle.Mask |= calcLiveInMask(SuccMBB);
- }
- }
- PropDown.clear();
- } while (!PropUp.empty());
- DEBUG({
- dbgs() << " live:";
- for (unsigned i = 0; i < 8; ++i)
- if (Bundle.Mask & (1<<i))
- dbgs() << " %FP" << i;
- dbgs() << '\n';
- });
+ // Update MBB ingoing bundle mask.
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
}
}
@@ -492,13 +419,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
return Changed;
}
-/// setupBlockStack - Use the BlockBundle map to set up our model of the stack
+/// setupBlockStack - Use the live bundles to set up our model of the stack
/// to match predecessors' live out stack.
void FPS::setupBlockStack() {
DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
<< " derived from " << MBB->getName() << ".\n");
StackTop = 0;
- const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first];
+ // Get the live-in bundle for MBB.
+ const LiveBundle &Bundle =
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
if (!Bundle.Mask) {
DEBUG(dbgs() << "Block has no FP live-ins.\n");
@@ -535,7 +464,8 @@ void FPS::finishBlockStack() {
DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
<< " derived from " << MBB->getName() << ".\n");
- unsigned BundleIdx = BlockBundle.lookup(MBB).second;
+ // Get MBB's live-out bundle.
+ unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
LiveBundle &Bundle = LiveBundles[BundleIdx];
// We may need to kill and define some registers to match successors.
diff --git a/lib/Target/X86/X86FrameInfo.h b/lib/Target/X86/X86FrameInfo.h
deleted file mode 100644
index 7828e17..0000000
--- a/lib/Target/X86/X86FrameInfo.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- X86TargetFrameInfo.h - Define TargetFrameInfo for X86 ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_FRAMEINFO_H
-#define X86_FRAMEINFO_H
-
-#include "X86Subtarget.h"
-#include "llvm/Target/TargetFrameInfo.h"
-
-namespace llvm {
- class MCSymbol;
-
-class X86FrameInfo : public TargetFrameInfo {
-protected:
- const X86Subtarget &STI;
-
-public:
- explicit X86FrameInfo(const X86Subtarget &sti)
- : TargetFrameInfo(StackGrowsDown,
- sti.getStackAlignment(),
- (sti.isTargetWin64() ? -40 : (sti.is64Bit() ? -8 : -4))),
- STI(sti) {
- }
-
- void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
- unsigned FramePtr) const;
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/X86/X86FrameInfo.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 98c579b..3246c43 100644
--- a/lib/Target/X86/X86FrameInfo.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- X86FrameInfo.cpp - X86 Frame Information ------------*- C++ -*-====//
+//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the X86 implementation of TargetFrameInfo class.
+// This file contains the X86 implementation of TargetFrameLowering class.
//
//===----------------------------------------------------------------------===//
-#include "X86FrameInfo.h"
+#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -24,12 +25,33 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallSet.h"
using namespace llvm;
// FIXME: completely move here.
extern cl::opt<bool> ForceStackAlign;
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineModuleInfo &MMI = MF.getMMI();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ return (DisableFramePointerElim(MF) ||
+ RI->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ MMI.callsUnwindInit());
+}
+
static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
if (is64Bit) {
if (isInt<8>(Imm))
@@ -54,12 +76,70 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
}
}
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetRegisterInfo &TRI,
+ bool Is64Bit) {
+ const MachineFunction *MF = MBB.getParent();
+ const Function *F = MF->getFunction();
+ if (!F || MF->getMMI().callsEHReturn())
+ return 0;
+
+ static const unsigned CallerSavedRegs32Bit[] = {
+ X86::EAX, X86::EDX, X86::ECX
+ };
+
+ static const unsigned CallerSavedRegs64Bit[] = {
+ X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+ X86::R8, X86::R9, X86::R10, X86::R11
+ };
+
+ unsigned Opc = MBBI->getOpcode();
+ switch (Opc) {
+ default: return 0;
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ SmallSet<unsigned, 8> Uses;
+ for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MBBI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+ Uses.insert(*AsI);
+ }
+
+ const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+ for (; *CS; ++CS)
+ if (!Uses.count(*CS))
+ return *CS;
+ }
+ }
+
+ return 0;
+}
+
+
/// emitSPUpdate - Emit a series of instructions to increment / decrement the
/// stack pointer by a constant value.
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
- const TargetInstrInfo &TII) {
+ unsigned StackPtr, int64_t NumBytes,
+ bool Is64Bit, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
unsigned Opc = isSub ?
@@ -70,10 +150,26 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
while (Offset) {
uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ if (ThisVal == (Is64Bit ? 8 : 4)) {
+ // Use push / pop instead.
+ unsigned Reg = isSub
+ ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ if (Reg) {
+ Opc = isSub
+ ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64Bit ? X86::POP64r : X86::POP32r);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+ Offset -= ThisVal;
+ continue;
+ }
+ }
+
MachineInstr *MI =
BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ThisVal);
+ .addReg(StackPtr)
+ .addImm(ThisVal);
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
Offset -= ThisVal;
}
@@ -180,12 +276,10 @@ static bool isEAXLiveIn(MachineFunction &MF) {
return false;
}
-void X86FrameInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
MCSymbol *Label,
unsigned FramePtr) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
MachineModuleInfo &MMI = MF.getMMI();
// Add callee saved registers to move list.
@@ -193,14 +287,11 @@ void X86FrameInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
if (CSI.empty()) return;
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const TargetData *TD = MF.getTarget().getTargetData();
- bool HasFP = RegInfo->hasFP(MF);
+ const TargetData *TD = TM.getTargetData();
+ bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
- int stackGrowth =
- (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
- TargetFrameInfo::StackGrowsUp ?
- TD->getPointerSize() : -TD->getPointerSize());
+ int stackGrowth = -TD->getPointerSize();
// FIXME: This is dirty hack. The code itself is pretty mess right now.
// It should be rewritten from scratch and generalized sometimes.
@@ -227,7 +318,7 @@ void X86FrameInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
// move" for this extra "PUSH", the linker will lose track of the fact that
// the frame pointer should have the value of the first "PUSH" when it's
// trying to unwind.
- //
+ //
// FIXME: This looks inelegant. It's possibly correct, but it's covering up
// another bug. I.e., one where we generate a prolog like this:
//
@@ -253,23 +344,20 @@ void X86FrameInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
/// generate the exception handling frames.
-void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
- const X86InstrInfo &TII =
- *static_cast<const X86InstrInfo*>(MF.getTarget().getInstrInfo());
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() ||
!Fn->doesNotThrow() || UnwindTablesMandatory;
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
- bool HasFP = RegInfo->hasFP(MF);
+ bool HasFP = hasFP(MF);
bool Is64Bit = STI.is64Bit();
bool IsWin64 = STI.isTargetWin64();
unsigned StackAlign = getStackAlignment();
@@ -309,11 +397,6 @@ void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
MFI->setStackSize(StackSize);
- } else if (IsWin64) {
- // We need to always allocate 32 bytes as register spill area.
- // FIXME: We might reuse these 32 bytes for leaf functions.
- StackSize += 32;
- MFI->setStackSize(StackSize);
}
// Insert stack pointer adjustment for later moving of return addr. Only
@@ -376,7 +459,6 @@ void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
} else {
- // FIXME: Verify & implement for FP
MachineLocation SPDst(StackPtr);
MachineLocation SPSrc(StackPtr, stackGrowth);
Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
@@ -470,12 +552,15 @@ void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
if (NumBytes >= 4096 &&
- (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
+ (STI.isTargetCygMing() || STI.isTargetWin32()) &&
+ !STI.isTargetEnvMacho()) {
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
const char *StackProbeSymbol =
- Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+ STI.isTargetWindows() ? "_chkstk" : "_alloca";
+ if (Is64Bit && STI.isTargetCygMing())
+ StackProbeSymbol = "__chkstk";
unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
if (!isEAXAlive) {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
@@ -504,9 +589,11 @@ void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
StackPtr, false, NumBytes - 4);
MBB.insert(MBBI, MI);
}
- } else if (NumBytes >= 4096 && Subtarget->isTargetWin64()) {
+ } else if (NumBytes >= 4096 &&
+ STI.isTargetWin64() &&
+ !STI.isTargetEnvMacho()) {
// Sanity check that EAX is not livein for this function. It should
- // should not be, so throw an assert.
+ // not be, so throw an assert.
assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
// Handle the 64-bit Windows ABI case where we need to call __chkstk.
@@ -516,9 +603,11 @@ void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
.addExternalSymbol("__chkstk")
.addReg(StackPtr, RegState::Define | RegState::Implicit);
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
} else if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
if ((NumBytes || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
@@ -533,7 +622,6 @@ void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
-StackSize + stackGrowth);
Moves.push_back(MachineMove(Label, SPDst, SPSrc));
} else {
- // FIXME: Verify & implement for FP
MachineLocation SPDst(StackPtr);
MachineLocation SPSrc(StackPtr, stackGrowth);
Moves.push_back(MachineMove(Label, SPDst, SPSrc));
@@ -546,15 +634,14 @@ void X86FrameInfo::emitPrologue(MachineFunction &MF) const {
}
}
-void X86FrameInfo::emitEpilogue(MachineFunction &MF,
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
- const X86InstrInfo &TII =
- *static_cast<const X86InstrInfo*>(MF.getTarget().getInstrInfo());
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no instructions");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
bool Is64Bit = STI.is64Bit();
@@ -596,7 +683,7 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
MaxAlign = MaxAlign ? MaxAlign : 4;
}
- if (RegInfo->hasFP(MF)) {
+ if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
if (RegInfo->needsStackRealignment(MF))
@@ -617,7 +704,7 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
- if (Opc != X86::POP32r && Opc != X86::POP64r &&
+ if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
!PI->getDesc().isTerminator())
break;
@@ -638,7 +725,7 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
// We cannot use LEA here, because stack pointer was realigned. We need to
// deallocate local frame back.
if (CSSize) {
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
MBBI = prior(LastCSPop);
}
@@ -659,12 +746,12 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
}
// We're returning from function via eh_return.
if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
- MBBI = prior(MBB.end());
+ MBBI = MBB.getLastNonDebugInstr();
MachineOperand &DestAddr = MBBI->getOperand(0);
assert(DestAddr.isReg() && "Offset should be in register!");
BuildMI(MBB, MBBI, DL,
@@ -676,7 +763,7 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
RetOpcode == X86::TCRETURNmi64) {
bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
// Tail call return: adjust the stack pointer and jump to callee.
- MBBI = prior(MBB.end());
+ MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
assert(StackAdjust.isImm() && "Expecting immediate value.");
@@ -694,15 +781,22 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
if (Offset) {
// Check for possible merge with preceeding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
}
// Jump to label or value in register.
if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
- BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
- ? X86::TAILJMPd : X86::TAILJMPd64)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+ ? X86::TAILJMPd : X86::TAILJMPd64));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
} else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
@@ -727,10 +821,196 @@ void X86FrameInfo::emitEpilogue(MachineFunction &MF,
(X86FI->getTCReturnAddrDelta() < 0)) {
// Add the return addr area delta back since we are not tail calling.
int delta = -1*X86FI->getTCReturnAddrDelta();
- MBBI = prior(MBB.end());
+ MBBI = MBB.getLastNonDebugInstr();
// Check for possible merge with preceeding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+ }
+}
+
+void
+X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = (STI.is64Bit() ? -8 : -4);
+ const X86RegisterInfo *RI = TM.getRegisterInfo();
+
+ // Initial state of the frame pointer is esp+stackGrowth.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(RI->getStackRegister(), stackGrowth);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // Add return address to move list
+ MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
+ MachineLocation CSSrc(RI->getRARegister());
+ Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+ const X86RegisterInfo *RI =
+ static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (RI->needsStackRealignment(MF)) {
+ if (FI < 0) {
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+ } else {
+ unsigned Align = MFI->getObjectAlignment(FI);
+ assert((-(Offset + StackSize)) % Align == 0);
+ Align = 0;
+ return Offset + StackSize;
+ }
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+
+ // Skip the RETADDR move area
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+
+ unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned CalleeFrameSize = 0;
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+ // Push GPRs. It increases frame size.
+ unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+ continue;
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+ // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+ // It can be done by spilling XMMs to stack frame.
+ // Note that only Win64 ABI might spill XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, TRI);
+ }
+
+ return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Reload XMMs from stack frame.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+
+ // POP GPRs.
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+ continue;
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+ }
+ return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned SlotSize = RegInfo->getSlotSize();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ if (TailCallReturnAddrDelta < 0) {
+ // create RETURNADDR area
+ // arg
+ // arg
+ // RETADDR
+ // { ...
+ // RETADDR area
+ // ...
+ // }
+ // [EBP]
+ MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+ }
+
+ if (hasFP(MF)) {
+ assert((TailCallReturnAddrDelta <= 0) &&
+ "The Delta should always be zero or negative");
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MFI->CreateFixedObject(SlotSize,
+ -(int)SlotSize +
+ TFI.getOffsetOfLocalArea() +
+ TailCallReturnAddrDelta,
+ true);
+ assert(FrameIdx == MFI->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ FrameIdx = 0;
}
}
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 0000000..d71108c
--- /dev/null
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,65 @@
+//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MCSymbol;
+ class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+ const X86TargetMachine &TM;
+ const X86Subtarget &STI;
+public:
+ explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+ : TargetFrameLowering(StackGrowsDown,
+ sti.getStackAlignment(),
+ (sti.is64Bit() ? -8 : -4)),
+ TM(tm), STI(sti) {
+ }
+
+ void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+ unsigned FramePtr) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 0226278..9b0ec6e 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -506,7 +506,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
N->getOperand(0),
MemTmp, MachinePointerInfo(), MemVT,
false, false, 0);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
MachinePointerInfo(),
MemVT, false, false, 0);
@@ -530,9 +530,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
MachineFrameInfo *MFI) {
const TargetInstrInfo *TII = TM.getInstrInfo();
- if (Subtarget->isTargetCygMing())
+ if (Subtarget->isTargetCygMing()) {
+ unsigned CallOp =
+ Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
BuildMI(BB, DebugLoc(),
- TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
+ TII->get(CallOp)).addExternalSymbol("__main");
+ }
}
void X86DAGToDAGISel::EmitFunctionEntryCode() {
@@ -682,25 +685,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-/// isLogicallyAddWithConstant - Return true if this node is semantically an
-/// add of a value with a constantint.
-static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
- // Check for (add x, Cst)
- if (V->getOpcode() == ISD::ADD)
- return isa<ConstantSDNode>(V->getOperand(1));
-
- // Check for (or x, Cst), where Cst & x == 0.
- if (V->getOpcode() != ISD::OR ||
- !isa<ConstantSDNode>(V->getOperand(1)))
- return false;
-
- // Handle "X | C" as "X + C" iff X is known to have C bits clear.
- ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
-
- // Check to see if the LHS & C is zero.
- return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
-}
-
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
bool is64Bit = Subtarget->is64Bit();
@@ -786,7 +770,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
- if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
+ if (CurDAG->isBaseWithConstantOffset(ShVal)) {
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
@@ -930,24 +914,18 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Add an artificial use to this node so that we can keep track of
// it if it gets CSE'd with a different node.
HandleSDNode Handle(N);
- SDValue LHS = Handle.getValue().getNode()->getOperand(0);
- SDValue RHS = Handle.getValue().getNode()->getOperand(1);
X86ISelAddressMode Backup = AM;
- if (!MatchAddressRecursively(LHS, AM, Depth+1) &&
- !MatchAddressRecursively(RHS, AM, Depth+1))
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
return false;
AM = Backup;
- LHS = Handle.getValue().getNode()->getOperand(0);
- RHS = Handle.getValue().getNode()->getOperand(1);
-
+
// Try again after commuting the operands.
- if (!MatchAddressRecursively(RHS, AM, Depth+1) &&
- !MatchAddressRecursively(LHS, AM, Depth+1))
+ if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+ !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
return false;
AM = Backup;
- LHS = Handle.getValue().getNode()->getOperand(0);
- RHS = Handle.getValue().getNode()->getOperand(1);
// If we couldn't fold both operands into the address at the same time,
// see if we can just put each operand into a register and fold at least
@@ -955,17 +933,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (AM.BaseType == X86ISelAddressMode::RegBase &&
!AM.Base_Reg.getNode() &&
!AM.IndexReg.getNode()) {
- AM.Base_Reg = LHS;
- AM.IndexReg = RHS;
+ N = Handle.getValue();
+ AM.Base_Reg = N.getOperand(0);
+ AM.IndexReg = N.getOperand(1);
AM.Scale = 1;
return false;
}
+ N = Handle.getValue();
break;
}
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
- if (isLogicallyAddWithConstant(N, CurDAG)) {
+ if (CurDAG->isBaseWithConstantOffset(N)) {
X86ISelAddressMode Backup = AM;
ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
uint64_t Offset = CN->getSExtValue();
@@ -1600,7 +1580,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal;
break;
}
-
+ case X86ISD::UMUL: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ unsigned LoReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
+ case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
+ case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+ case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+ SDValue Ops[] = {N1, InFlag};
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+ ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+ return NULL;
+ }
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
@@ -1650,14 +1655,15 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 1);
+
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {
- InFlag =
- SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
+ InFlag = SDValue(CNode, 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1696,7 +1702,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
+
return NULL;
}
@@ -1781,7 +1787,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (isSigned && !signBitIsZero) {
// Sign extend the low part into the high part.
InFlag =
- SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+ SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
SDValue ClrNode =
@@ -1795,14 +1801,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {
InFlag =
- SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6233af5..c21aa43 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,9 +16,9 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
-#include "X86ShuffleDecode.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
+#include "Utils/X86ShuffleDecode.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -28,6 +28,7 @@
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -44,7 +45,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VectorExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
@@ -55,43 +55,171 @@ using namespace dwarf;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<bool>
-DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
-
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
-static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
+
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference. Idx is an index in the 128 bits we
+/// want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ int Factor = VT.getSizeInBits() / 128;
+
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
+ ElVT,
+ VT.getVectorNumElements() / Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+ if (isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
+ // we can match to VEXTRACTF128.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+ VecIdx);
+
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ if (isa<ConstantSDNode>(Idx)) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ EVT ResultVT = Result.getValueType();
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+ // Insert the relevant 128 bits.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
- if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
- if (is64Bit) return new X8664_MachoTargetObjectFile();
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Given two vectors, concat them.
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
+ DebugLoc dl = Lower.getDebugLoc();
+
+ assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(),
+ Lower.getValueType().getVectorElementType(),
+ Lower.getValueType().getVectorNumElements() * 2);
+
+ // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
+ assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
+
+ // Insert the upper subvector.
+ SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
+ DAG.getConstant(
+ // This is half the length of the result
+ // vector. Start inserting the upper 128
+ // bits here.
+ Lower.getValueType().getVectorNumElements(),
+ MVT::i32),
+ DAG, dl);
+
+ // Insert the lower subvector.
+ Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Vec;
+}
+
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ bool is64Bit = Subtarget->is64Bit();
+
+ if (Subtarget->isTargetEnvMacho()) {
+ if (is64Bit)
+ return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
- } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){
- if (is64Bit) return new X8664_ELFTargetObjectFile(TM);
+ }
+
+ if (Subtarget->isTargetELF()) {
+ if (is64Bit)
+ return new X8664_ELFTargetObjectFile(TM);
return new X8632_ELFTargetObjectFile(TM);
- } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
- return new TargetLoweringObjectFileCOFF();
}
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
}
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
- X86ScalarSSEf64 = Subtarget->hasSSE2();
- X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86ScalarSSEf64 = Subtarget->hasXMMInt();
+ X86ScalarSSEf32 = Subtarget->hasXMM();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo();
TD = getTargetData();
// Set up the TargetLowering object.
+ static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird, it always uses i8 for shift amounts and setcc results.
- setShiftAmountType(MVT::i8);
setBooleanContents(ZeroOrOneBooleanContent);
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
@@ -226,12 +354,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSEf64) {
- setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
- setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
- setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
}
@@ -245,30 +373,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- setOperationAction(ISD::MULHS , MVT::i8 , Expand);
- setOperationAction(ISD::MULHU , MVT::i8 , Expand);
- setOperationAction(ISD::SDIV , MVT::i8 , Expand);
- setOperationAction(ISD::UDIV , MVT::i8 , Expand);
- setOperationAction(ISD::SREM , MVT::i8 , Expand);
- setOperationAction(ISD::UREM , MVT::i8 , Expand);
- setOperationAction(ISD::MULHS , MVT::i16 , Expand);
- setOperationAction(ISD::MULHU , MVT::i16 , Expand);
- setOperationAction(ISD::SDIV , MVT::i16 , Expand);
- setOperationAction(ISD::UDIV , MVT::i16 , Expand);
- setOperationAction(ISD::SREM , MVT::i16 , Expand);
- setOperationAction(ISD::UREM , MVT::i16 , Expand);
- setOperationAction(ISD::MULHS , MVT::i32 , Expand);
- setOperationAction(ISD::MULHU , MVT::i32 , Expand);
- setOperationAction(ISD::SDIV , MVT::i32 , Expand);
- setOperationAction(ISD::UDIV , MVT::i32 , Expand);
- setOperationAction(ISD::SREM , MVT::i32 , Expand);
- setOperationAction(ISD::UREM , MVT::i32 , Expand);
- setOperationAction(ISD::MULHS , MVT::i64 , Expand);
- setOperationAction(ISD::MULHU , MVT::i64 , Expand);
- setOperationAction(ISD::SDIV , MVT::i64 , Expand);
- setOperationAction(ISD::UDIV , MVT::i64 , Expand);
- setOperationAction(ISD::SREM , MVT::i64 , Expand);
- setOperationAction(ISD::UREM , MVT::i64 , Expand);
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+ setOperationAction(ISD::ADDC, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Custom);
+ setOperationAction(ISD::SUBC, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Custom);
+ }
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
@@ -285,21 +404,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
+ if (Subtarget->hasPOPCNT()) {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ }
+
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
@@ -307,7 +432,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
setOperationAction(ISD::SELECT , MVT::i8 , Custom);
- setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
@@ -350,7 +475,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
// We may not have a libcall for MEMBARRIER so we should lower this.
@@ -364,15 +489,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setShouldFoldAtomicFences(true);
// Expand certain atomics
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
-
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ }
if (!Subtarget->is64Bit()) {
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
@@ -521,13 +642,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
- bool ignored;
- APFloat TmpFlt(+0.0);
- TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
- &ignored);
+ APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+
+ bool ignored;
APFloat TmpFlt2(+1.0);
TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
&ignored);
@@ -573,8 +693,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
@@ -622,7 +743,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
- if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
+ if (!UseSoftFloat && Subtarget->hasMMX()) {
addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
@@ -654,12 +775,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
- if (!UseSoftFloat && Subtarget->hasSSE1()) {
+ if (!UseSoftFloat && Subtarget->hasXMM()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
@@ -676,7 +797,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
}
- if (!UseSoftFloat && Subtarget->hasSSE2()) {
+ if (!UseSoftFloat && Subtarget->hasXMMInt()) {
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -821,9 +942,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE42()) {
+ if (Subtarget->hasSSE42())
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
- }
if (!UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
@@ -836,27 +956,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
+
setOperationAction(ISD::FADD, MVT::v8f32, Legal);
setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
- //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom);
- //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
- //setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
- //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom);
-
- // Operations to consider commented out -v16i16 v32i8
- //setOperationAction(ISD::ADD, MVT::v16i16, Legal);
- setOperationAction(ISD::ADD, MVT::v8i32, Custom);
- setOperationAction(ISD::ADD, MVT::v4i64, Custom);
- //setOperationAction(ISD::SUB, MVT::v32i8, Legal);
- //setOperationAction(ISD::SUB, MVT::v16i16, Legal);
- setOperationAction(ISD::SUB, MVT::v8i32, Custom);
- setOperationAction(ISD::SUB, MVT::v4i64, Custom);
- //setOperationAction(ISD::MUL, MVT::v16i16, Legal);
+
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
@@ -864,85 +971,66 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4f64, Custom);
- // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom);
- // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom);
- setOperationAction(ISD::VSETCC, MVT::v8i32, Custom);
-
- // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom);
- // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom);
- // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom);
-
-#if 0
- // Not sure we want to do this since there are no 256-bit integer
- // operations in AVX
-
- // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
- // This includes 256-bit vectors
- for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
- EVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to custom lower non-power-of-2 vectors
- if (!isPowerOf2_32(VT.getVectorNumElements()))
+ // Custom lower build_vector, vector_shuffle, scalar_to_vector,
+ // insert_vector_elt extract_subvector and extract_vector_elt for
+ // 256-bit types.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-256-bit vectors
+ if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
+ || (MVT(VT).getSizeInBits() < 256))
continue;
-
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- }
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ }
+ // Custom-lower insert_subvector and extract_subvector based on
+ // the result type.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-256-bit vectors
+ if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+ continue;
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
+ if (MVT(VT).getSizeInBits() == 128) {
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ }
+ else if (MVT(VT).getSizeInBits() == 256) {
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ }
}
-#endif
-
-#if 0
- // Not sure we want to do this since there are no 256-bit integer
- // operations in AVX
- // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
- // Including 256-bit vectors
- for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
- EVT VT = (MVT::SimpleValueType)i;
+ // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+ // Don't promote loads because we need them for VPERM vector index versions.
- if (!VT.is256BitVector()) {
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT++) {
+ if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
+ || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
continue;
- }
- setOperationAction(ISD::AND, VT, Promote);
- AddPromotedToType (ISD::AND, VT, MVT::v4i64);
- setOperationAction(ISD::OR, VT, Promote);
- AddPromotedToType (ISD::OR, VT, MVT::v4i64);
- setOperationAction(ISD::XOR, VT, Promote);
- AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
- setOperationAction(ISD::SELECT, VT, Promote);
- AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+ setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64);
+ //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
+ //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
}
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-#endif
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- // Add/Sub/Mul with overflow operations are custom lowered.
- setOperationAction(ISD::SADDO, MVT::i32, Custom);
- setOperationAction(ISD::UADDO, MVT::i32, Custom);
- setOperationAction(ISD::SSUBO, MVT::i32, Custom);
- setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::SMULO, MVT::i32, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@@ -950,14 +1038,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::SADDO, MVT::i64, Custom);
- setOperationAction(ISD::UADDO, MVT::i64, Custom);
- setOperationAction(ISD::SSUBO, MVT::i64, Custom);
- setOperationAction(ISD::USUBO, MVT::i64, Custom);
- setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::SADDO, VT, Custom);
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::SSUBO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
+ setOperationAction(ISD::SMULO, VT, Custom);
+ setOperationAction(ISD::UMULO, VT, Custom);
}
+ // There are no 8-bit 3-address imul/mul instructions
+ setOperationAction(ISD::SMULO, MVT::i8, Expand);
+ setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, 0);
@@ -974,6 +1069,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget->is64Bit())
@@ -981,11 +1079,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
computeRegisterProperties();
- // FIXME: These should be based on subtarget info. Plus, the values should
- // be smaller when we are in optimizing for size mode.
+ // On Darwin, -Os means optimize for size without hurting performance,
+ // do not reduce the limit.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
}
@@ -1036,7 +1137,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
}
unsigned Align = 4;
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
getMaxByValAlign(Ty, Align);
return Align;
}
@@ -1077,7 +1178,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
Subtarget->getStackAlignment() >= 8 &&
- Subtarget->hasSSE2()) {
+ Subtarget->hasXMMInt()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
return MVT::f64;
@@ -1144,6 +1245,7 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(EVT VT) const{
const TargetRegisterClass *RRC = 0;
@@ -1169,17 +1271,20 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
return std::make_pair(RRC, Cost);
}
+// FIXME: Why this routine is here? Move to RegInfo!
unsigned
X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
case X86::GR32RegClassID:
return 4 - FPDiff;
case X86::GR64RegClassID:
- return 8 - FPDiff;
+ return 12 - FPDiff;
case X86::VR128RegClassID:
return Subtarget->is64Bit() ? 10 : 4;
case X86::VR64RegClassID:
@@ -1263,14 +1368,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled");
}
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+ (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
@@ -1291,13 +1396,13 @@ X86TargetLowering::LowerReturn(SDValue Chain,
if (Subtarget->is64Bit()) {
if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
- ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
- ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy);
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
}
}
}
@@ -1336,6 +1441,28 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MVT::Other, &RetOps[0], RetOps.size());
}
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg &&
+ Copy->getOpcode() != ISD::FP_EXTEND)
+ return false;
+
+ bool HasRet = false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != X86ISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+
+ return HasRet;
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
@@ -1360,7 +1487,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
- ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled");
}
@@ -1381,7 +1508,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
SDValue Ops[] = { Chain, InFlag };
- Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag,
+ Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue,
Ops, 2), 1);
Val = Chain.getValue(0);
@@ -1404,7 +1531,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
MVT::i64, InFlag).getValue(1);
Val = Chain.getValue(0);
}
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val);
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
@@ -1542,6 +1669,12 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U;
@@ -1587,7 +1720,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::BCvt)
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
@@ -1669,17 +1802,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
TotalNumIntRegs);
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
- assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
- if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
+ if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
if (IsWin64) {
- const TargetFrameInfo &TFI = *getTargetMachine().getFrameInfo();
+ const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -1776,8 +1909,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
- const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
- unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
+ unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal())
@@ -1836,6 +1968,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
bool IsStructRet = CallIsStructReturn(Outs);
bool IsSibcall = false;
@@ -1861,6 +1994,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
@@ -1920,14 +2059,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CCValAssign::AExt:
if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
// Special case: passing MMX values in XMM registers.
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
break;
case CCValAssign::Indirect: {
// Store the argument.
@@ -1943,7 +2082,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- if (isVarArg && Subtarget->isTargetWin64()) {
+ if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
unsigned ShadowReg = 0;
@@ -2009,7 +2148,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
}
- if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) {
+ if (Is64Bit && isVarArg && !IsWin64) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -2024,7 +2163,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
- assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ assert((Subtarget->hasXMM() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
@@ -2140,8 +2279,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
- // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
- // symbols should go through the PLT.
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+ // external symbols should go through the PLT.
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
@@ -2158,7 +2297,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
@@ -2184,7 +2323,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
// Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
- if (Is64Bit && isVarArg && !Subtarget->isTargetWin64())
+ if (Is64Bit && isVarArg && !IsWin64)
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
if (InFlag.getNode())
@@ -2271,7 +2410,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
@@ -2298,7 +2437,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
@@ -2444,14 +2583,17 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
return false;
- if (Subtarget->isTargetWin64())
- // Win64 ABI has additional complications.
- return false;
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
@@ -2545,6 +2687,10 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
@@ -2612,6 +2758,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
@@ -2713,8 +2863,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
// First determine if it is required or is profitable to flip the operands.
// If LHS is a foldable load, but RHS is not, flip the condition.
- if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
- !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
+ if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+ !ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
@@ -3023,7 +3173,8 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
- if (NumElems != 2 && NumElems != 4)
+ if ((NumElems != 2 && NumElems != 4)
+ || N->getValueType(0).getSizeInBits() > 128)
return false;
for (unsigned i = 0; i < NumElems/2; ++i)
@@ -3045,19 +3196,36 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return false;
- for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (!isUndefOrEqual(BitI1, NumElts))
- return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts))
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start, j = s * NumSectionElts;
+ i != End;
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
}
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
}
+
return true;
}
@@ -3105,14 +3273,27 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
- for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (!isUndefOrEqual(BitI1, j))
- return false;
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElems / NumSections;
+
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
+ i != NumSectionElts * (s + 1);
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
}
+
return true;
}
@@ -3263,6 +3444,44 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
return true;
}
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
@@ -3331,6 +3550,42 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
return (Val - i) * EltSize;
}
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ EVT VecVT = N->getOperand(0).getValueType();
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ EVT VecVT = N->getValueType(0);
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ return Index / NumElemsPerChunk;
+}
+
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
@@ -3499,7 +3754,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
/// getOnesVector - Returns a vector of specified type with all bits set.
@@ -3512,7 +3767,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
@@ -3597,9 +3852,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Perform the splat.
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
+ V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT, V1);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
@@ -3671,11 +3926,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
- DecodePUNPCKLMask(NumElems, ShuffleMask);
+ DecodePUNPCKLMask(VT, ShuffleMask);
break;
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- DecodeUNPCKLPMask(NumElems, ShuffleMask);
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ DecodeUNPCKLPMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, ShuffleMask);
@@ -3723,7 +3982,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
}
// Actual nodes that may contain scalar elements
- if (Opcode == ISD::BIT_CONVERT) {
+ if (Opcode == ISD::BITCAST) {
V = V.getOperand(0);
EVT SrcVT = V.getValueType();
unsigned NumElems = VT.getVectorNumElements();
@@ -3912,7 +4171,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
@@ -3953,10 +4212,11 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
const TargetLowering &TLI, DebugLoc dl) {
EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
- SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
- DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
+ DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(SrcOp.getValueType()))));
}
SDValue
@@ -3979,8 +4239,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
FI = FINode->getIndex();
Offset = 0;
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ } else if (DAG.isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
Offset = Ptr.getConstantOperandVal(1);
@@ -4021,8 +4280,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, 0);
// Canonicalize it to a v4i32 shuffle.
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getVectorShuffle(MVT::v4i32, dl, V1,
DAG.getUNDEF(MVT::v4i32),&Mask[0]));
}
@@ -4090,7 +4349,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
Ops, 2, MVT::i32,
LDBase->getMemOperand());
- return DAG.getNode(ISD::BIT_CONVERT, DL, VT, ResNode);
+ return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
}
@@ -4098,6 +4357,34 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
+
+ EVT VT = Op.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+
+ unsigned NumElems = Op.getNumOperands();
+
+ // For AVX-length vectors, build the individual 128-bit pieces and
+ // use shuffles to put them in place.
+ if (VT.getSizeInBits() > 256 &&
+ Subtarget->hasAVX() &&
+ !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ SmallVector<SDValue, 8> V;
+ V.resize(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ V[i] = Op.getOperand(i);
+ }
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+ // Build the lower subvector.
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+ // Build the upper subvector.
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+ NumElems/2);
+
+ return ConcatVectors(Lower, Upper, DAG);
+ }
+
// All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
// All one's are handled with pcmpeqd. In AVX, zero's are handled with
// vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
@@ -4116,11 +4403,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
}
- EVT VT = Op.getValueType();
- EVT ExtVT = VT.getVectorElementType();
unsigned EVTBits = ExtVT.getSizeInBits();
- unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
@@ -4182,7 +4466,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG.getUNDEF(Item.getValueType()),
&Mask[0]);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
}
}
@@ -4206,7 +4490,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasSSE2(), DAG);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
@@ -4399,21 +4683,21 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
int Mask[2];
- SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0));
+ SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
InVec = Op.getOperand(1);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
unsigned NumElts = ResVT.getVectorNumElements();
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
} else {
- InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec);
+ InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
Mask[0] = 0; Mask[1] = 2;
VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
}
// v8i16 shuffles - Prefer shuffles in the following order:
@@ -4495,9 +4779,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
- NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+ NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
// Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
// source words for the shuffle, to aid later transformations.
@@ -4566,12 +4850,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1);
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
if (!TwoInputs)
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
// Calculate the shuffle mask for the second input, shuffle it, and
// OR it with the first shuffled input.
@@ -4586,12 +4870,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
}
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
}
// If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
@@ -4758,8 +5042,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
// No SSSE3 - Calculate in place words and then fix all out of place words
// With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
// the 16 different words that comprise the two doublequadword input vectors.
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
SDValue NewV = V2Only ? V2 : V1;
for (int i = 0; i != 8; ++i) {
int Elt0 = MaskVals[i*2];
@@ -4797,7 +5081,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DAG.getIntPtrConstant(Elt1 / 2));
if ((Elt1 & 1) == 0)
InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
- DAG.getConstant(8, TLI.getShiftAmountTy()));
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt.getValueType())));
else if (Elt0 >= 0)
InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
DAG.getConstant(0xFF00, MVT::i16));
@@ -4811,7 +5096,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
if ((Elt0 & 1) != 0)
InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
- DAG.getConstant(8, TLI.getShiftAmountTy()));
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt0.getValueType())));
else if (Elt1 >= 0)
InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
DAG.getConstant(0x00FF, MVT::i16));
@@ -4821,7 +5107,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
DAG.getIntPtrConstant(i));
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
@@ -4865,8 +5151,8 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
MaskVec.push_back(StartIdx / Scale);
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
}
@@ -4885,11 +5171,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
// PR2108
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
OpVT,
@@ -4899,9 +5185,9 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
}
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
OpVT, SrcOp)));
}
@@ -5012,6 +5298,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
+ Locs.resize(4);
SmallVector<int,8> LoMask(4U, -1);
SmallVector<int,8> HiMask(4U, -1);
@@ -5055,7 +5342,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
}
static bool MayFoldVectorLoad(SDValue V) {
- if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
V = V.getOperand(0);
@@ -5072,7 +5359,7 @@ static bool MayFoldVectorLoad(SDValue V) {
// one use. Remove this version after this bug get fixed.
// rdar://8434668, PR8156
static bool RelaxedMayFoldVectorLoad(SDValue V) {
- if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
V = V.getOperand(0);
@@ -5110,7 +5397,7 @@ bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
// If the bit convert changed the number of elements, it is unsafe
// to examine the mask.
bool HasShuffleIntoBitcast = false;
- if (V.getOpcode() == ISD::BIT_CONVERT) {
+ if (V.getOpcode() == ISD::BITCAST) {
EVT SrcVT = V.getOperand(0).getValueType();
if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
return false;
@@ -5125,7 +5412,7 @@ bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
// Skip one more bit_convert if necessary
- if (V.getOpcode() == ISD::BIT_CONVERT)
+ if (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (ISD::isNormalLoad(V.getNode())) {
@@ -5162,8 +5449,8 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
// Canonizalize to v2f64.
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, V1);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
V1, DAG));
}
@@ -5225,6 +5512,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
CanFoldLoad = true;
+ // Both of them can't be memory operations though.
+ if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
+ CanFoldLoad = false;
+
if (CanFoldLoad) {
if (HasSSE2 && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
@@ -5253,16 +5544,20 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
-static inline unsigned getUNPCKLOpcode(EVT VT) {
+static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
- case MVT::v4f32: return X86ISD::UNPCKLPS;
- case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v4f32:
+ return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
+ case MVT::v2f64:
+ return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+ case MVT::v4f64: return X86ISD::VUNPCKLPDY;
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
default:
- llvm_unreachable("Unknow type for unpckl");
+ llvm_unreachable("Unknown type for unpckl");
}
return 0;
}
@@ -5276,7 +5571,7 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
case MVT::v16i8: return X86ISD::PUNPCKHBW;
case MVT::v8i16: return X86ISD::PUNPCKHWD;
default:
- llvm_unreachable("Unknow type for unpckh");
+ llvm_unreachable("Unknown type for unpckh");
}
return 0;
}
@@ -5317,7 +5612,7 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode())
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, NewOp);
+ return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
@@ -5386,7 +5681,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -5507,7 +5802,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (X86::isUNPCKLMask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V1, V2, DAG);
if (X86::isUNPCKHMask(SVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
@@ -5534,7 +5830,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
@@ -5557,8 +5854,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
- if (VT == MVT::v2f64)
- return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2f64) {
+ X86ISD::NodeType Opcode =
+ getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
+ }
if (VT == MVT::v2i64)
return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
}
@@ -5585,7 +5885,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (X86::isUNPCKL_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V1, V1, DAG);
if (X86::isUNPCKH_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -5627,7 +5928,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1)));
@@ -5648,14 +5949,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
if ((User->getOpcode() != ISD::STORE ||
(isa<ConstantSDNode>(Op.getOperand(1)) &&
cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
- (User->getOpcode() != ISD::BIT_CONVERT ||
+ (User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
} else if (VT == MVT::i32) {
// ExtractPS works with constant index.
if (isa<ConstantSDNode>(Op.getOperand(1)))
@@ -5671,6 +5972,38 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+
+ // If this is a 256-bit vector result, first extract the 128-bit
+ // vector and then extract from the 128-bit vector.
+ if (VecVT.getSizeInBits() > 128) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ unsigned NumElems = VecVT.getVectorNumElements();
+ SDValue Idx = Op.getOperand(1);
+
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Get the 128-bit vector.
+ bool Upper = IdxVal >= ExtractNumElems;
+ Vec = Extract128BitVector(Vec, Idx, DAG, dl);
+
+ // Extract from it.
+ SDValue ScaledIdx = Idx;
+ if (Upper)
+ ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(ExtractNumElems,
+ Idx.getValueType()));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+ ScaledIdx);
+ }
+
+ assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
@@ -5686,7 +6019,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
@@ -5783,17 +6116,45 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (VT.getSizeInBits() > 128) {
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+
+ // Get the 128-bit vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+ bool Upper = IdxVal >= NumElems / 2;
+
+ SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+
+ // Insert into it.
+ SDValue ScaledN2 = N2;
+ if (Upper)
+ ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
+ DAG.getConstant(NumElems /
+ (VT.getSizeInBits() / 128),
+ N2.getValueType()));
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
+ N1, ScaledN2);
+
+ // Insert the 128-bit vector
+ // FIXME: Why UNDEF?
+ return Insert128BitVector(N0, Op, N2, DAG, dl);
+ }
+
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EltVT == MVT::i8)
return SDValue();
- DebugLoc dl = Op.getDebugLoc();
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
- SDValue N2 = Op.getOperand(2);
-
if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
@@ -5808,7 +6169,25 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
+ EVT OpVT = Op.getValueType();
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (OpVT.getSizeInBits() > 128) {
+ // Insert into a 128-bit vector.
+ EVT VT128 = EVT::getVectorVT(*Context,
+ OpVT.getVectorElementType(),
+ OpVT.getVectorNumElements() / 2);
+
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+ // Insert the 128-bit vector.
+ return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
+ DAG.getConstant(0, MVT::i32),
+ DAG, dl);
+ }
if (Op.getValueType() == MVT::v1i64 &&
Op.getOperand(0).getValueType() == MVT::i64)
@@ -5817,10 +6196,47 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
"Expected an SSE type!");
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
}
+// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+SDValue
+X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue Idx = Op.getNode()->getOperand(1);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+ && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+ return Extract128BitVector(Vec, Idx, DAG, dl);
+ }
+ }
+ return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+SDValue
+X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue SubVec = Op.getNode()->getOperand(1);
+ SDValue Idx = Op.getNode()->getOperand(2);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 256
+ && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
+ return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+ }
+ }
+ return SDValue();
+}
+
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
@@ -6015,7 +6431,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
DebugLoc dl = GA->getDebugLoc();
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
@@ -6150,7 +6566,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
OpFlag = X86II::MO_TLVP;
DebugLoc DL = Op.getDebugLoc();
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
- getPointerTy(),
+ GA->getValueType(0),
GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
@@ -6163,8 +6579,10 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// Lowering the machine isd will make sure everything is in the right
// location.
- SDValue Args[] = { Offset };
- SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Args[] = { Chain, Offset };
+ Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -6269,7 +6687,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
- Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
@@ -6388,7 +6806,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
- SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
+ SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
MachinePointerInfo::getConstantPool(),
false, false, 16);
@@ -6418,19 +6836,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
DAG.getIntPtrConstant(0)));
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
DAG.getIntPtrConstant(0));
// Or the load with the bias.
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Load)),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Bias)));
Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
DAG.getIntPtrConstant(0));
// Subtract the bias.
@@ -6525,7 +6943,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
FudgePtr, MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
@@ -6688,11 +7106,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo::getConstantPool(),
false, false, 16);
if (VT.isVector()) {
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, MVT::v2i64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
Op.getOperand(0)),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask)));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
@@ -6744,7 +7162,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
DAG.getConstant(32, MVT::i32));
- SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
DAG.getIntPtrConstant(0));
}
@@ -7003,8 +7421,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
- if (Op0.getOpcode() == ISD::AND &&
- Op0.hasOneUse() &&
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(Op1)->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
@@ -7013,19 +7430,25 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return NewSetCC;
}
- // Look for "(setcc) == / != 1" to avoid unncessary setcc.
- if (Op0.getOpcode() == X86ISD::SETCC &&
- Op1.getOpcode() == ISD::Constant &&
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
cast<ConstantSDNode>(Op1)->isNullValue()) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- bool Invert = (CC == ISD::SETNE) ^
- cast<ConstantSDNode>(Op1)->isNullValue();
- if (Invert)
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
CCode = X86::GetOppositeBranchCondition(CCode);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
}
bool isFP = Op1.getValueType().isFloatingPoint();
@@ -7033,17 +7456,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (X86CC == X86::COND_INVALID)
return SDValue();
- SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
-
- // Use sbb x, x to materialize carry bit into a GPR.
- if (X86CC == X86::COND_B)
- return DAG.getNode(ISD::AND, dl, MVT::i8,
- DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond),
- DAG.getConstant(1, MVT::i8));
-
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond);
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
}
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -7167,6 +7582,8 @@ static bool isX86LogicalCmp(SDValue Op) {
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD ||
Opc == X86ISD::SUB ||
+ Opc == X86ISD::ADC ||
+ Opc == X86ISD::SBB ||
Opc == X86ISD::SMUL ||
Opc == X86ISD::UMUL ||
Opc == X86ISD::INC ||
@@ -7176,13 +7593,28 @@ static bool isX86LogicalCmp(SDValue Op) {
Opc == X86ISD::AND))
return true;
+ if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+ return true;
+
return false;
}
+static bool isZero(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isNullValue();
+}
+
+static bool isAllOnes(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isAllOnesValue();
+}
+
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
- DebugLoc dl = Op.getDebugLoc();
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
SDValue CC;
if (Cond.getOpcode() == ISD::SETCC) {
@@ -7191,30 +7623,40 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = NewCond;
}
- // (select (x == 0), -1, 0) -> (sign_bit (x - 1))
- SDValue Op1 = Op.getOperand(1);
- SDValue Op2 = Op.getOperand(2);
+ // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+ // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+ // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+ // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
- cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) {
+ Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+ isZero(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
- if (Cmp.getOpcode() == X86ISD::CMP) {
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1);
+
+ unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+ if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+ (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+ SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue Res = // Res = 0 or -1.
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+ if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+ Res = DAG.getNOT(DL, Res, Res.getValueType());
+
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
- ConstantSDNode *RHSC =
- dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode());
- if (N1C && N1C->isAllOnesValue() &&
- N2C && N2C->isNullValue() &&
- RHSC && RHSC->isNullValue()) {
- SDValue CmpOp0 = Cmp.getOperand(0);
- Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
- CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
- return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(),
- DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
- }
+ if (N2C == 0 || !N2C->isNullValue())
+ Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+ return Res;
}
}
- // Look pass (and (setcc_carry (cmp ...)), 1).
+ // Look past (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
@@ -7252,7 +7694,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
@@ -7266,11 +7708,28 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
+ // a < b ? -1 : 0 -> RES = ~setcc_carry
+ // a < b ? 0 : -1 -> RES = setcc_carry
+ // a >= b ? -1 : 0 -> RES = setcc_carry
+ // a >= b ? 0 : -1 -> RES = ~setcc_carry
+ if (Cond.getOpcode() == X86ISD::CMP) {
+ unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+ (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+ if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+ return DAG.getNOT(DL, Res, Res.getValueType());
+ return Res;
+ }
+ }
+
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = { Op2, Op1, CC, Cond };
- return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops));
+ return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
@@ -7469,7 +7928,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
Flag = Chain.getValue(1);
@@ -7577,7 +8036,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(!UseSoftFloat &&
!(DAG.getMachineFunction()
.getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
- Subtarget->hasSSE1());
+ Subtarget->hasXMM());
}
// Insert VAARG_64 node into the DAG
@@ -7893,7 +8352,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
}
EVT VT = Op.getValueType();
- ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt);
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(NewIntNo, MVT::i32),
Op.getOperand(1), ShAmt);
@@ -8148,7 +8607,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
@@ -8327,7 +8786,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
false, false, 16);
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
- Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
@@ -8353,9 +8812,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
- R, M, Op);
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
@@ -8370,15 +8827,12 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
- R, M, Op);
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
// return pblendv(r, r+r, a);
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
return R;
}
@@ -8395,8 +8849,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue RHS = N->getOperand(1);
unsigned BaseOp = 0;
unsigned Cond = 0;
- DebugLoc dl = Op.getDebugLoc();
-
+ DebugLoc DL = Op.getDebugLoc();
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
@@ -8435,19 +8888,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
BaseOp = X86ISD::SMUL;
Cond = X86::COND_O;
break;
- case ISD::UMULO:
- BaseOp = X86ISD::UMUL;
- Cond = X86::COND_B;
- break;
+ case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+ MVT::i32);
+ SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(X86::COND_O, MVT::i32),
+ SDValue(Sum.getNode(), 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+ return Sum;
+ }
}
// Also sets EFLAGS.
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
- SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
+ SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
SDValue SetCC =
- DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
- DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
+ DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32),
+ SDValue(Sum.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
return Sum;
@@ -8520,7 +8983,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(3),
DAG.getTargetConstant(size, MVT::i8),
cpIn.getValue(1) };
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
Ops, 5, T, MMO);
@@ -8532,7 +8995,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->is64Bit() && "Result not type legalized?");
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
@@ -8548,16 +9011,15 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
-SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
- assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
- Subtarget->hasMMX() && !DisableMMX) &&
- "Unexpected custom BIT_CONVERT");
+ assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
- "Unexpected custom BIT_CONVERT");
+ "Unexpected custom BITCAST");
// i64 <=> MMX conversions are Legal.
if (SrcVT==MVT::i64 && DstVT.isVector())
return Op;
@@ -8569,6 +9031,7 @@ SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
// All other conversions need to be expanded.
return SDValue();
}
+
SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
@@ -8583,6 +9046,32 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
cast<AtomicSDNode>(Node)->getAlignment());
}
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Invalid code");
+ case ISD::ADDC: Opc = X86ISD::ADD; break;
+ case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+ case ISD::SUBC: Opc = X86ISD::SUB; break;
+ case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -8596,6 +9085,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
@@ -8640,7 +9131,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
- case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
}
}
@@ -8677,6 +9172,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
default:
assert(false && "Do not know how to custom type legalize this operation!");
return;
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ // We don't want to expand or promote these.
+ return;
case ISD::FP_TO_SINT: {
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, true);
@@ -8690,7 +9191,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::READCYCLECOUNTER: {
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = N->getOperand(0);
SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
@@ -8726,7 +9227,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
swapInH.getValue(1) };
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
Ops, 3, T, MMO);
@@ -8803,6 +9304,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::PANDN: return "X86ISD::PANDN";
+ case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
+ case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
+ case X86ISD::PSIGND: return "X86ISD::PSIGND";
+ case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
@@ -8836,6 +9342,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::ADC: return "X86ISD::ADC";
+ case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::INC: return "X86ISD::INC";
@@ -8869,6 +9377,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
+ case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS";
+ case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD";
+ case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY";
+ case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
@@ -9403,15 +9915,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
-
assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
"Target must have SSE4.2 or AVX features enabled");
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
unsigned Opc;
-
if (!Subtarget->hasAVX()) {
if (memArg)
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
@@ -9424,20 +9933,59 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
}
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
-
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
for (unsigned i = 0; i < numArgs; ++i) {
MachineOperand &Op = MI->getOperand(i+1);
-
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
-
- BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+ BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // Address into RAX/EAX, other two args into ECX, EDX.
+ unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ unsigned ValOps = X86::AddrNumOperands;
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(ValOps).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+ .addReg(MI->getOperand(ValOps+1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ // First arg in ECX, the second in EAX.
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(0).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+ .addReg(MI->getOperand(1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
@@ -9925,6 +10473,30 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
+ case X86::TAILJMPd64:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPm64:
+ assert(!"TAILJMP64 would not be touched here.");
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+ // On AMD64, additional defs should be added before register allocation.
+ if (!Subtarget->isTargetWin64()) {
+ MI->addRegisterDefined(X86::RSI);
+ MI->addRegisterDefined(X86::RDI);
+ MI->addRegisterDefined(X86::XMM6);
+ MI->addRegisterDefined(X86::XMM7);
+ MI->addRegisterDefined(X86::XMM8);
+ MI->addRegisterDefined(X86::XMM9);
+ MI->addRegisterDefined(X86::XMM10);
+ MI->addRegisterDefined(X86::XMM11);
+ MI->addRegisterDefined(X86::XMM12);
+ MI->addRegisterDefined(X86::XMM13);
+ MI->addRegisterDefined(X86::XMM14);
+ MI->addRegisterDefined(X86::XMM15);
+ }
+ return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::TLSCall_32:
@@ -10040,6 +10612,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
+ // Thread synchronization.
+ case X86::MONITOR:
+ return EmitMonitor(MI, BB);
+ case X86::MWAIT:
+ return EmitMwait(MI, BB);
+
// Atomic Lowering.
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
@@ -10233,6 +10811,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
default: break;
case X86ISD::ADD:
case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
case X86ISD::SMUL:
case X86ISD::UMUL:
case X86ISD::INC:
@@ -10281,13 +10861,18 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
/// if the load addresses are consecutive, non-overlapping, and in the right
/// order.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ TargetLowering::DAGCombinerInfo &DCI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
if (VT.getSizeInBits() != 128)
return SDValue();
+ // Don't create instructions with illegal types after legalize types has run.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+ return SDValue();
+
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
@@ -10944,6 +11529,36 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
return SDValue();
}
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ // Want to form PANDN nodes, in the hopes of then easily combining them with
+ // OR and AND nodes to form PBLEND/PSIGN.
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v2i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for vnot
+ if (N0.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+
+ // Check RHS for vnot
+ if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+}
+
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -10951,12 +11566,99 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT VT = N->getValueType(0);
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
return SDValue();
- // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+
+ // look for psign/blend
+ if (Subtarget->hasSSSE3()) {
+ if (VT == MVT::v2i64) {
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::PANDN)
+ std::swap(N0, N1);
+ // or (and (m, x), (pandn m, y))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and PANDN and
+ if (!Y.getNode())
+ return SDValue();
+
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ if (Mask.getOpcode() != ISD::BITCAST ||
+ X.getOpcode() != ISD::BITCAST ||
+ Y.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ // Look through mask bitcast.
+ Mask = Mask.getOperand(0);
+ EVT MaskVT = Mask.getValueType();
+
+ // Validate that the Mask operand is a vector sra node. The sra node
+ // will be an intrinsic.
+ if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
+
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ break;
+ default: return SDValue();
+ }
+
+ // Check that the SRA is all signbits.
+ SDValue SraC = Mask.getOperand(2);
+ unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ X = X.getOperand(0);
+ Y = Y.getOperand(0);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+ unsigned Opc = 0;
+ switch (EltBits) {
+ case 8: Opc = X86ISD::PSIGNB; break;
+ case 16: Opc = X86ISD::PSIGNW; break;
+ case 32: Opc = X86ISD::PSIGND; break;
+ default: break;
+ }
+ if (Opc) {
+ SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+ }
+ }
+ // PBLENDVB only available on SSE 4.1
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+ Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+ }
+ }
+ }
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
@@ -11175,13 +11877,13 @@ static SDValue PerformBTCombine(SDNode *N,
static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
- if (Op.getOpcode() == ISD::BIT_CONVERT)
+ if (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
EVT VT = N->getValueType(0), OpVT = Op.getValueType();
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
VT.getVectorElementType().getSizeInBits() ==
OpVT.getVectorElementType().getSizeInBits()) {
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
}
return SDValue();
}
@@ -11212,19 +11914,106 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned X86CC = N->getConstantOperandVal(0);
+ SDValue EFLAG = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+ // a zext and produces an all-ones bit which is more useful than 0/1 in some
+ // cases.
+ if (X86CC == X86::COND_B)
+ return DAG.getNode(ISD::AND, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAG),
+ DAG.getConstant(1, MVT::i8));
+
+ return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+ X86TargetLowering::DAGCombinerInfo &DCI) {
+ // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+ // the result is either zero or one (depending on the input carry bit).
+ // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+ if (X86::isZeroNode(N->getOperand(0)) &&
+ X86::isZeroNode(N->getOperand(1)) &&
+ // We don't have a good way to replace an EFLAGS use, so only do this when
+ // dead right now.
+ SDValue(N, 1).use_empty()) {
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+ SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B,MVT::i8),
+ N->getOperand(2)),
+ DAG.getConstant(1, VT));
+ return DCI.CombineTo(N, Res1, CarryOut);
+ }
+
+ return SDValue();
+}
+
+// fold (add Y, (sete X, 0)) -> adc 0, Y
+// (add Y, (setne X, 0)) -> sbb -1, Y
+// (sub (sete X, 0), Y) -> sbb 0, Y
+// (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // Look through ZExts.
+ SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+ if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC = Ext.getOperand(0);
+ if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+ return SDValue();
+
+ X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ SDValue Cmp = SetCC.getOperand(1);
+ if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+ !X86::isZeroNode(Cmp.getOperand(1)) ||
+ !Cmp.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+ DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+ if (CC == X86::COND_NE)
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
- return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::ADD:
+ case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG);
+ case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
@@ -11233,6 +12022,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
case X86ISD::PALIGN:
@@ -11248,6 +12038,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
@@ -11255,7 +12049,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
}
return SDValue();
@@ -11362,38 +12156,8 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
-static bool LowerToBSwap(CallInst *CI) {
- // FIXME: this should verify that we are targetting a 486 or better. If not,
- // we will turn this bswap into something that will be lowered to logical ops
- // instead of emitting the bswap asm. For now, we don't support 486 or lower
- // so don't worry about this.
-
- // Verify this is a simple bswap.
- if (CI->getNumArgOperands() != 1 ||
- CI->getType() != CI->getArgOperand(0)->getType() ||
- !CI->getType()->isIntegerTy())
- return false;
-
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
- if (!Ty || Ty->getBitWidth() % 16 != 0)
- return false;
-
- // Okay, we can do this xform, do so now.
- const Type *Tys[] = { Ty };
- Module *M = CI->getParent()->getParent()->getParent();
- Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
-
- Value *Op = CI->getArgOperand(0);
- Op = CallInst::Create(Int, Op, CI->getName(), CI);
-
- CI->replaceAllUsesWith(Op);
- CI->eraseFromParent();
- return true;
-}
-
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
- InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
std::string AsmStr = IA->getAsmString();
@@ -11408,6 +12172,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+ // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
// bswap $0
if (AsmPieces.size() == 2 &&
(AsmPieces[0] == "bswap" ||
@@ -11417,7 +12185,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "${0:q}")) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
- return LowerToBSwap(CI);
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
@@ -11427,15 +12198,18 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[2] == "${0:w}" &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
AsmPieces.clear();
- const std::string &Constraints = IA->getConstraintString();
- SplitString(StringRef(Constraints).substr(5), AsmPieces, ",");
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
std::sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
- return LowerToBSwap(CI);
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
}
break;
@@ -11455,36 +12229,45 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
Words[2] == "${0:w}") {
AsmPieces.clear();
- const std::string &Constraints = IA->getConstraintString();
- SplitString(StringRef(Constraints).substr(5), AsmPieces, ",");
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
std::sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
- return LowerToBSwap(CI);
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
}
}
}
}
- if (CI->getType()->isIntegerTy(64) &&
- Constraints.size() >= 2 &&
- Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
- Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
- // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
- SmallVector<StringRef, 4> Words;
- SplitString(AsmPieces[0], Words, " \t");
- if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
- Words.clear();
- SplitString(AsmPieces[1], Words, " \t");
- if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+
+ if (CI->getType()->isIntegerTy(64)) {
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+ if (Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ SmallVector<StringRef, 4> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
Words.clear();
- SplitString(AsmPieces[2], Words, " \t,");
- if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
- Words[2] == "%edx") {
- return LowerToBSwap(CI);
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
}
}
}
@@ -11575,12 +12358,12 @@ TargetLowering::ConstraintWeight
weight = CW_SpecificReg;
break;
case 'y':
- if (type->isX86_MMXTy() && !DisableMMX && Subtarget->hasMMX())
+ if (type->isX86_MMXTy() && Subtarget->hasMMX())
weight = CW_SpecificReg;
break;
case 'x':
case 'Y':
- if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1())
+ if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
weight = CW_Register;
break;
case 'I':
@@ -11650,9 +12433,9 @@ LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
- if (Subtarget->hasSSE2())
+ if (Subtarget->hasXMMInt())
return "Y";
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
return "x";
}
@@ -11882,10 +12665,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass);
case 'Y': // SSE_REGS if SSE2 allowed
- if (!Subtarget->hasSSE2()) break;
+ if (!Subtarget->hasXMMInt()) break;
// FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed
- if (!Subtarget->hasSSE1()) break;
+ if (!Subtarget->hasXMM()) break;
switch (VT.getSimpleVT().SimpleTy) {
default: break;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 776299e..6ec4a7d 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -86,13 +86,13 @@ namespace llvm {
/// X86 bit-test instructions.
BT,
- /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag
- /// operand produced by a CMP instruction.
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+ /// operand, usually produced by a CMP instruction.
SETCC,
// Same as SETCC except it's materialized with a sbb and the value is all
// one's or all zero's.
- SETCC_CARRY,
+ SETCC_CARRY, // R = carry_bit ? ~0 : 0
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
@@ -160,6 +160,15 @@ namespace llvm {
/// PSHUFB - Shuffle 16 8-bit values within a vector.
PSHUFB,
+ /// PANDN - and with not'd value.
+ PANDN,
+
+ /// PSIGNB/W/D - Copy integer sign.
+ PSIGNB, PSIGNW, PSIGND,
+
+ /// PBLENDVB - Variable blend
+ PBLENDVB,
+
/// FMAX, FMIN - Floating point max and min.
///
FMAX, FMIN,
@@ -200,10 +209,12 @@ namespace llvm {
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
- // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
- ADD, SUB, SMUL, UMUL,
+ // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+ ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
+ UMUL, // LOW, HI, FLAGS = umul LHS, RHS
+
// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,
@@ -237,6 +248,10 @@ namespace llvm {
MOVSS,
UNPCKLPS,
UNPCKLPD,
+ VUNPCKLPS,
+ VUNPCKLPD,
+ VUNPCKLPSY,
+ VUNPCKLPDY,
UNPCKHPS,
UNPCKHPD,
PUNPCKLBW,
@@ -256,6 +271,12 @@ namespace llvm {
// WIN_ALLOCA - Windows's _chkstk call to do stack probing.
WIN_ALLOCA,
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE,
+
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
@@ -267,12 +288,6 @@ namespace llvm {
ATOMNAND64_DAG,
ATOMSWAP64_DAG,
- // Memory barrier
- MEMBARRIER,
- MFENCE,
- SFENCE,
- LFENCE,
-
// LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
LCMPXCHG_DAG,
LCMPXCHG8_DAG,
@@ -397,6 +412,16 @@ namespace llvm {
/// specifies a shuffle of elements that is suitable for input to PALIGNR.
bool isPALIGNRMask(ShuffleVectorSDNode *N);
+ /// isVEXTRACTF128Index - Return true if the specified
+ /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+ /// suitable for input to VEXTRACTF128.
+ bool isVEXTRACTF128Index(SDNode *N);
+
+ /// isVINSERTF128Index - Return true if the specified
+ /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+ /// suitable for input to VINSERTF128.
+ bool isVINSERTF128Index(SDNode *N);
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
@@ -414,6 +439,16 @@ namespace llvm {
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
unsigned getShufflePALIGNRImmediate(SDNode *N);
+ /// getExtractVEXTRACTF128Immediate - Return the appropriate
+ /// immediate to extract the specified EXTRACT_SUBVECTOR index
+ /// with VEXTRACTF128 instructions.
+ unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+ /// getInsertVINSERTF128Immediate - Return the appropriate
+ /// immediate to insert at the specified INSERT_SUBVECTOR index
+ /// with VINSERTF128 instructions.
+ unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool isZeroNode(SDValue Elt);
@@ -432,6 +467,8 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
virtual const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned uid,
@@ -730,6 +767,8 @@ namespace llvm {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -740,7 +779,7 @@ namespace llvm {
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
- SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
@@ -805,6 +844,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -821,6 +862,13 @@ namespace llvm {
MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
unsigned argNum, bool inMem) const;
+ /// Utility functions to emit monitor and mwait instructions. These
+ /// need to make sure that the arguments to the intrinsic are in the
+ /// correct registers.
+ MachineBasicBlock *EmitMonitor(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
+
/// Utility function to emit atomic bitwise operations (and, or, xor).
/// It takes the bitwise instruction to expand, the associated machine basic
/// block, and the associated X86 opcodes for reg/reg and reg/imm.
@@ -871,6 +919,9 @@ namespace llvm {
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f82e1c6..f0ea068 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -60,11 +60,12 @@ def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src",
- []>; // EAX,EDX = EAX*GR32
+ "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
+ "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
@@ -630,6 +631,15 @@ class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+ EFLAGS))]>;
+
// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo,
@@ -668,6 +678,14 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+ EFLAGS))]>;
+
// BinOpRI - Instructions like "add reg, reg, imm".
class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
@@ -698,6 +716,14 @@ class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+ EFLAGS))]>;
+
// BinOpRI8 - Instructions like "add reg, reg, imm8".
class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
@@ -728,6 +754,14 @@ class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+ EFLAGS))]>;
+
// BinOpMR - Instructions like "add [mem], reg".
class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
list<dag> pattern>
@@ -742,6 +776,14 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
(implicit EFLAGS)]>;
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+
// BinOpMR_F - Instructions like "cmp [mem], reg".
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
@@ -765,6 +807,14 @@ class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
typeinfo.ImmOperator:$src), addr:$dst),
(implicit EFLAGS)]>;
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
// BinOpMI_F - Instructions like "cmp [mem], imm".
class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
@@ -790,6 +840,14 @@ class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
typeinfo.Imm8Operator:$src), addr:$dst),
(implicit EFLAGS)]>;
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
// BinOpMI8_F - Instructions like "cmp [mem], imm8".
class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
@@ -875,22 +933,24 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
}
-/// ArithBinOp_R - This is an arithmetic binary operator where the pattern is
-/// defined with "(set GPR:$dst, (...". It would be really nice to find a way
-/// to factor this with the other ArithBinOp_*.
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
///
-multiclass ArithBinOp_R<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
- string mnemonic, Format RegMRM, Format MemMRM,
- SDNode opnode,
- bit CommutableRR, bit ConvertibleToThreeAddress> {
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode, bit CommutableRR,
+ bit ConvertibleToThreeAddress> {
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR,
isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def #NAME#8rr : BinOpRR_R<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16rr : BinOpRR_R<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32rr : BinOpRR_R<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64rr : BinOpRR_R<BaseOpc, mnemonic, Xi64, opnode>;
+ def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
@@ -898,40 +958,40 @@ multiclass ArithBinOp_R<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
- def #NAME#8rm : BinOpRM_R<BaseOpc2, mnemonic, Xi8 , opnode>;
- def #NAME#16rm : BinOpRM_R<BaseOpc2, mnemonic, Xi16, opnode>;
- def #NAME#32rm : BinOpRM_R<BaseOpc2, mnemonic, Xi32, opnode>;
- def #NAME#64rm : BinOpRM_R<BaseOpc2, mnemonic, Xi64, opnode>;
+ def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def #NAME#16ri8 : BinOpRI8_R<0x82, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri8 : BinOpRI8_R<0x82, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri8 : BinOpRI8_R<0x82, mnemonic, Xi64, opnode, RegMRM>;
-
- def #NAME#8ri : BinOpRI_R<0x80, mnemonic, Xi8 , opnode, RegMRM>;
- def #NAME#16ri : BinOpRI_R<0x80, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri : BinOpRI_R<0x80, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri32: BinOpRI_R<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
}
} // Constraints = "$src1 = $dst"
- def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+ def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+ def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
- def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
- def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+ def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
@@ -1017,13 +1077,12 @@ defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
// Arithmetic.
let Uses = [EFLAGS] in {
- // FIXME: Delete ArithBinOp_R if these switch off adde/sube.
- defm ADC : ArithBinOp_R<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, adde, 1, 0>;
- defm SBB : ArithBinOp_R<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, sube, 0, 0>;
+ defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+ 1, 0>;
+ defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+ 0, 0>;
}
-
-
defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index b299b90..4c915d9 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -193,9 +193,26 @@ def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
} // isCodeGenOnly
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type. When
+// this happens, it is great. However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+ (SETBr)>;
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
@@ -244,8 +261,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
- "leal\t$sym, %eax; "
- "call\t___tls_get_addr@PLT",
+ "# TLS_addr32",
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
@@ -259,11 +275,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
- ".byte\t0x66; "
- "leaq\t$sym(%rip), %rdi; "
- ".word\t0x6666; "
- "rex64; "
- "call\t__tls_get_addr@PLT",
+ "# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
@@ -271,7 +283,7 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
// For i386, the address of the thunk is passed on the stack, on return the
// address of the variable is in %eax. %ecx is trashed during the function
// call. All other registers are preserved.
-let Defs = [EAX, ECX],
+let Defs = [EAX, ECX, EFLAGS],
Uses = [ESP],
usesCustomInserter = 1 in
def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
@@ -281,8 +293,8 @@ def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
// For x86_64, the address of the thunk is passed in %rdi, on return
// the address of the variable is in %rax. All other registers are preserved.
-let Defs = [RAX],
- Uses = [RDI],
+let Defs = [RAX, EFLAGS],
+ Uses = [RSP, RDI],
usesCustomInserter = 1 in
def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLSCall_64",
@@ -837,38 +849,38 @@ def : Pat<(X86call (i64 texternalsym:$dst)),
// tailcall stuff
def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
(TCRETURNri GR32_TC:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
+ Requires<[In32BitMode]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[In32BitMode, IsNotPIC]>;
+ Requires<[In32BitMode, IsNotPIC]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
+ Requires<[In32BitMode]>;
def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
+ Requires<[In32BitMode]>;
-def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
- (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode]>;
def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode]>;
// Normal calls, with various flavors of addresses.
def : Pat<(X86call (i32 tglobaladdr:$dst)),
@@ -878,43 +890,6 @@ def : Pat<(X86call (i32 texternalsym:$dst)),
def : Pat<(X86call (i32 imm:$dst)),
(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
-// X86 specific add which produces a flag.
-def : Pat<(addc GR32:$src1, GR32:$src2),
- (ADD32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(addc GR32:$src1, (load addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-def : Pat<(addc GR32:$src1, imm:$src2),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-def : Pat<(addc GR64:$src1, GR64:$src2),
- (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(addc GR64:$src1, (load addr:$src2)),
- (ADD64rm GR64:$src1, addr:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
- (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
- (ADD64ri32 GR64:$src1, imm:$src2)>;
-
-def : Pat<(subc GR32:$src1, GR32:$src2),
- (SUB32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(subc GR32:$src1, (load addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-def : Pat<(subc GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
- (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-def : Pat<(subc GR64:$src1, GR64:$src2),
- (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(subc GR64:$src1, (load addr:$src2)),
- (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
- (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(subc GR64:$src1, imm:$src2),
- (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
// Comparisons.
// TEST R,R is smaller than CMP R,0
@@ -1043,8 +1018,9 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
let AddedComplexity = 5 in { // Try this before the selecting to OR
-let isCommutable = 1, isConvertibleToThreeAddress = 1,
+let isConvertibleToThreeAddress = 1,
Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"", // orw/addw REG, REG
[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
@@ -1054,6 +1030,7 @@ def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
"", // orq/addq REG, REG
[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
@@ -1647,4 +1624,3 @@ def : Pat<(and GR64:$src1, i64immSExt8:$src2),
(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(and GR64:$src1, i64immSExt32:$src2),
(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index ee20ebc..77f4725 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -1,10 +1,10 @@
//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 jump, return, call, and related instructions.
@@ -26,7 +26,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
[(X86retflag timm:$amt)]>;
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"retw\t$amt",
- [(X86retflag timm:$amt)]>, OpSize;
+ []>, OpSize;
def LRETL : I <0xCB, RawFrm, (outs), (ins),
"lretl", []>;
def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
@@ -43,7 +43,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
"jmp\t$dst", [(br bb:$dst)]>;
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
"jmp\t$dst", []>;
- def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmp{q}\t$dst", []>;
}
@@ -108,16 +108,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
[(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
- def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
"ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
- "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+ "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
"ljmp{q}\t{*}$dst", []>;
- def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+ def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
"ljmp{w}\t{*}$dst", []>, OpSize;
def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
"ljmp{l}\t{*}$dst", []>;
@@ -152,14 +152,14 @@ let isCall = 1 in
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
Requires<[In32BitMode]>;
-
- def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
"lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
"lcall{l}\t{$seg, $off|$off, $seg}", []>;
-
+
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
"lcall{w}\t{*}$dst", []>, OpSize;
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
@@ -182,16 +182,13 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in {
- def TCRETURNdi : I<0, Pseudo, (outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- def TCRETURNri : I<0, Pseudo, (outs),
- (ins GR32_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
+ def TCRETURNdi : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+ def TCRETURNri : PseudoI<(outs),
+ (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
let mayLoad = 1 in
- def TCRETURNmi : I<0, Pseudo, (outs),
- (ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
+ def TCRETURNmi : PseudoI<(outs),
+ (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
// FIXME: The should be pseudo instructions that are lowered when going to
// mcinst.
@@ -199,7 +196,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
(ins i32imm_pcrel:$dst, variable_ops),
"jmp\t$dst # TAILCALL",
[]>;
- def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
"", []>; // FIXME: Remove encoding when JIT is dead.
let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
@@ -221,7 +218,7 @@ let isCall = 1 in
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in {
-
+
// NOTE: this pattern doesn't match "X86call imm", because we do not know
// that the offset between an arbitrary immediate and the call will fit in
// the 32-bit pcrel field that we have.
@@ -235,12 +232,12 @@ let isCall = 1 in
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode, NotWin64]>;
-
+
def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
"lcall{q}\t{*}$dst", []>;
}
- // FIXME: We need to teach codegen about single list of call-clobbered
+ // FIXME: We need to teach codegen about single list of call-clobbered
// registers.
let isCall = 1, isCodeGenOnly = 1 in
// All calls clobber the non-callee saved registers. RSP is marked as
@@ -259,40 +256,39 @@ let isCall = 1, isCodeGenOnly = 1 in
def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
"call{q}\t{*}$dst",
[(X86call GR64:$dst)]>, Requires<[IsWin64]>;
- def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst,variable_ops),
+ def WINCALL64m : I<0xFF, MRM2m, (outs),
+ (ins i64mem:$dst,variable_ops),
"call{q}\t{*}$dst",
- [(X86call (loadi64 addr:$dst))]>,
+ [(X86call (loadi64 addr:$dst))]>,
Requires<[IsWin64]>;
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in
- let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in {
- def TCRETURNdi64 : I<0, Pseudo, (outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
- variable_ops),
- "#TC_RETURN $dst $offset", []>;
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP],
+ usesCustomInserter = 1 in {
+ def TCRETURNdi64 : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+ []>;
+ def TCRETURNri64 : PseudoI<(outs),
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
let mayLoad = 1 in
- def TCRETURNmi64 : I<0, Pseudo, (outs),
- (ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
+ def TCRETURNmi64 : PseudoI<(outs),
+ (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
(ins i64i32imm_pcrel:$dst, variable_ops),
"jmp\t$dst # TAILCALL", []>;
- def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
let mayLoad = 1 in
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
}
-
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 9124f90..b506f5e 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -34,12 +34,12 @@ def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
- [SDNPHasChain, SDNPInFlag, SDNPMayStore,
+ [SDNPHasChain, SDNPInGlue, SDNPMayStore,
SDNPMemOperand]>;
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
- [SDNPHasChain, SDNPOutFlag, SDNPMayLoad,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPMemOperand]>;
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -73,41 +73,23 @@ def fpimmneg1 : PatLeaf<(fpimm), [{
// Some 'special' instructions
let usesCustomInserter = 1 in { // Expanded after instruction selection.
- def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP32:$src),
- "##FP32_TO_INT16_IN_MEM PSEUDO!",
+ def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
[(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
- def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP32:$src),
- "##FP32_TO_INT32_IN_MEM PSEUDO!",
+ def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
[(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
- def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP32:$src),
- "##FP32_TO_INT64_IN_MEM PSEUDO!",
+ def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
[(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
- def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP64:$src),
- "##FP64_TO_INT16_IN_MEM PSEUDO!",
+ def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
[(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
- def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP64:$src),
- "##FP64_TO_INT32_IN_MEM PSEUDO!",
+ def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
[(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
- def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP64:$src),
- "##FP64_TO_INT64_IN_MEM PSEUDO!",
+ def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
[(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
- def FP80_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP80:$src),
- "##FP80_TO_INT16_IN_MEM PSEUDO!",
+ def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
[(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
- def FP80_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP80:$src),
- "##FP80_TO_INT32_IN_MEM PSEUDO!",
+ def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
[(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
- def FP80_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP80:$src),
- "##FP80_TO_INT64_IN_MEM PSEUDO!",
+ def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
}
@@ -643,8 +625,12 @@ def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
"fxsave\t$dst", []>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
"fxrstor\t$src", []>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index a440359..0660072 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -41,6 +41,8 @@ def MRM_F8 : Format<41>;
def MRM_F9 : Format<42>;
def RawFrmImm8 : Format<43>;
def RawFrmImm16 : Format<44>;
+def MRM_D0 : Format<45>;
+def MRM_D1 : Format<46>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -135,17 +137,17 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
bits<4> Prefix = 0; // Which prefix byte does this inst have?
- bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
+ bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
Domain ExeDomain = d;
- bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix?
+ bit hasVEXPrefix = 0; // Does this inst require a VEX prefix?
bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
- bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
- bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register
+ bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field?
+ bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
// to be encoded in a immediate field?
- bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers?
+ bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
@@ -168,6 +170,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{37} = has3DNow0F0FOpcode;
}
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+ let Pattern = pattern;
+}
+
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: X86Inst<o, f, NoImm, outs, ins, asm, d> {
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 70c3d07..3cbfac1 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -43,6 +43,21 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
+def X86pandn : SDNode<"X86ISD::PANDN",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignb : SDNode<"X86ISD::PSIGNB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignw : SDNode<"X86ISD::PSIGNW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignd : SDNode<"X86ISD::PSIGND",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pblendv : SDNode<"X86ISD::PBLENDVB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
@@ -117,6 +132,8 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
@@ -327,6 +344,18 @@ def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePALIGNRImmediate(N));
}]>;
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+ return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+ return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
@@ -417,3 +446,16 @@ def palign : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_palign_imm>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+ (extract_subvector node:$bigvec,
+ node:$index), [{
+ return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+ node:$index),
+ (insert_subvector node:$bigvec, node:$smallvec,
+ node:$index), [{
+ return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 79d9872..21df57c 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -58,7 +58,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
TB_NOT_REVERSABLE = 1U << 31,
TB_FLAGS = TB_NOT_REVERSABLE
};
-
+
static const unsigned OpTbl2Addr[][2] = {
{ X86::ADC32ri, X86::ADC32mi },
{ X86::ADC32ri8, X86::ADC32mi8 },
@@ -231,16 +231,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
-
+
// If this is not a reversable operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
continue;
-
+
// Index 0, folded load and store, no alignment requirement.
unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
-
- assert(!MemOp2RegOpTable.count(MemOp) &&
+
+ assert(!MemOp2RegOpTable.count(MemOp) &&
"Duplicated entries in unfolding maps?");
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
@@ -334,12 +334,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
unsigned Align = OpTbl0[i][3];
assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
-
+
// If this is not a reversable operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
continue;
-
+
// Index 0, folded load or store.
unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
@@ -369,8 +369,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
{ X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
{ X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
- { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
- { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
{ X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
{ X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
{ X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 },
@@ -461,12 +459,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
unsigned Align = OpTbl1[i][2];
assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
-
+
// If this is not a reversable operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
continue;
-
+
// Index 1, folded load
unsigned AuxInfo = 1 | (1 << 4);
assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
@@ -568,6 +566,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL16rr, X86::IMUL16rm, 0 },
{ X86::IMUL32rr, X86::IMUL32rm, 0 },
{ X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, 16 },
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 },
{ X86::MAXPSrr, X86::MAXPSrm, 16 },
@@ -678,15 +678,15 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
unsigned RegOp = OpTbl2[i][0];
unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
unsigned Align = OpTbl2[i][2];
-
+
assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
-
+
// If this is not a reversable operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
continue;
-
+
// Index 2, folded load
unsigned AuxInfo = 2 | (1 << 4);
assert(!MemOp2RegOpTable.count(MemOp) &&
@@ -808,7 +808,7 @@ static bool isFrameStoreOpcode(int Opcode) {
return false;
}
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode()))
if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
@@ -816,7 +816,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return 0;
}
-unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode())) {
unsigned Reg;
@@ -946,10 +946,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
isPICBase = true;
}
return isPICBase;
- }
+ }
return false;
}
-
+
case X86::LEA32r:
case X86::LEA64r: {
if (MI->getOperand(2).isImm() &&
@@ -1124,9 +1124,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-
+
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
+ // well be shifting and then extracting the lower 16-bits.
// This has the potential to cause partial register stall. e.g.
// movw (%rbp,%rcx,2), %dx
// leal -65(%rdx), %esi
@@ -1162,7 +1162,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
- addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
break;
case X86::ADD16rr:
case X86::ADD16rr_DB: {
@@ -1177,7 +1177,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
} else {
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
+ // well be shifting and then extracting the lower 16-bits.
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
InsMI2 =
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
@@ -1244,7 +1244,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SHUFPSrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
-
+
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
if (B != C) return 0;
@@ -1392,7 +1392,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
RC = X86::GR32_NOSPRegisterClass;
}
-
+
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
@@ -1471,7 +1471,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(Dest, MI, NewMI);
}
- MFI->insert(MBBI, NewMI); // Insert the new inst
+ MFI->insert(MBBI, NewMI); // Insert the new inst
return NewMI;
}
@@ -1692,7 +1692,7 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isTerminator()) return false;
-
+
// Conditional branch is a special case.
if (TID.isBranch() && !TID.isBarrier())
return true;
@@ -1701,7 +1701,7 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI);
}
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -1862,7 +1862,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I = MBB.end();
++Count;
}
-
+
return Count;
}
@@ -2025,6 +2025,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
case X86::GR64_NOREX_NOSPRegClassID:
case X86::GR64_NOSPRegClassID:
case X86::GR64_TCRegClassID:
+ case X86::GR64_TCW64RegClassID:
return load ? X86::MOV64rm : X86::MOV64mr;
case X86::GR32RegClassID:
case X86::GR32_ABCDRegClassID:
@@ -2151,76 +2152,6 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
NewMIs.push_back(MIB);
}
-bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
- bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
- unsigned SlotSize = is64Bit ? 8 : 4;
-
- MachineFunction &MF = *MBB.getParent();
- unsigned FPReg = RI.getFrameRegister(MF);
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- unsigned CalleeFrameSize = 0;
-
- unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- if (Reg == FPReg)
- // X86RegisterInfo::emitPrologue will handle spilling of frame register.
- continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- CalleeFrameSize += SlotSize;
- BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- RC, &RI);
- }
- }
-
- X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
- return true;
-}
-
-bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- MachineFunction &MF = *MBB.getParent();
- unsigned FPReg = RI.getFrameRegister(MF);
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
- bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
- unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (Reg == FPReg)
- // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
- continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- BuildMI(MBB, MI, DL, get(Opc), Reg);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- RC, &RI);
- }
- }
- return true;
-}
-
MachineInstr*
X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -2247,7 +2178,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MIB.addOperand(MOs[i]);
if (NumAddrOps < 4) // FrameIndex only
addOffset(MIB, 0);
-
+
// Loop over the rest of the ri operands, converting them over.
unsigned NumOps = MI->getDesc().getNumOperands()-2;
for (unsigned i = 0; i != NumOps; ++i) {
@@ -2268,7 +2199,7 @@ static MachineInstr *FuseInst(MachineFunction &MF,
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
MachineInstrBuilder MIB(NewMI);
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (i == OpNo) {
@@ -2317,7 +2248,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (isTwoAddr && NumOps >= 2 && i < 2 &&
MI->getOperand(0).isReg() &&
MI->getOperand(1).isReg() &&
- MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
@@ -2331,14 +2262,14 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
if (NewMI)
return NewMI;
-
+
OpcodeTablePtr = &RegOp2MemOpTable0;
} else if (i == 1) {
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (i == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
}
-
+
// If table selected...
if (OpcodeTablePtr) {
// Find the Opcode to fuse
@@ -2386,8 +2317,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NewMI;
}
}
-
- // No fusion
+
+ // No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
@@ -2398,7 +2329,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return NULL;
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2450,7 +2381,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return NULL;
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2490,9 +2421,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 16;
break;
case X86::FsFLD0SD:
+ case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
+ case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
@@ -2556,9 +2489,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
unsigned Opc = LoadMI->getOpcode();
- if (Opc == X86::FsFLD0SS)
+ if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
@@ -2591,13 +2524,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return 0;
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
switch (MI->getOpcode()) {
default: return false;
- case X86::TEST8rr:
+ case X86::TEST8rr:
case X86::TEST16rr:
case X86::TEST32rr:
case X86::TEST64rr:
@@ -2618,7 +2551,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
- if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
switch (Opc) {
@@ -2634,7 +2567,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
} else if (OpNum == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
}
-
+
if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
return true;
return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
@@ -2704,7 +2637,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// Emit the data processing instruction.
MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
MachineInstrBuilder MIB(DataMI);
-
+
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
@@ -3152,12 +3085,8 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
-bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
- const MachineRegisterInfo *MRI,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- switch (DefMI->getOpcode()) {
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+ switch (opc) {
default: return false;
case X86::DIVSDrm:
case X86::DIVSDrm_Int:
@@ -3187,6 +3116,14 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
}
}
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ return isHighLatencyDef(DefMI->getOpcode());
+}
+
namespace {
/// CGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
@@ -3224,11 +3161,11 @@ namespace {
PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
else
PC = GlobalBaseReg;
-
+
// Operand of MovePCtoStack is completely ignored by asm printer. It's
// only used in JIT code emission as displacement to pc.
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-
+
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 46db68e..81bbd3d 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -33,15 +33,15 @@ namespace X86 {
AddrScaleAmt = 1,
AddrIndexReg = 2,
AddrDisp = 3,
-
+
/// AddrSegmentReg - The operand # of the segment in the memory operand.
AddrSegmentReg = 4,
/// AddrNumOperands - Total number of operands in a memory reference.
AddrNumOperands = 5
};
-
-
+
+
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
enum CondCode {
@@ -72,16 +72,16 @@ namespace X86 {
COND_INVALID
};
-
+
// Turn condition code into conditional branch opcode.
unsigned GetCondBranchFromCond(CondCode CC);
-
+
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(X86::CondCode CC);
}
-
+
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
@@ -90,14 +90,14 @@ namespace X86II {
enum TOF {
//===------------------------------------------------------------------===//
// X86 Specific MachineOperand flags.
-
+
MO_NO_FLAG,
-
+
/// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
/// relocation of:
/// SYMBOL_LABEL + [. - PICBASELABEL]
MO_GOT_ABSOLUTE_ADDRESS,
-
+
/// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
/// immediate should get the value of the symbol minus the PIC base label:
/// SYMBOL_LABEL - PICBASELABEL
@@ -106,77 +106,77 @@ namespace X86II {
/// MO_GOT - On a symbol operand this indicates that the immediate is the
/// offset to the GOT entry for the symbol name from the base of the GOT.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOT
MO_GOT,
-
+
/// MO_GOTOFF - On a symbol operand this indicates that the immediate is
- /// the offset to the location of the symbol name from the base of the GOT.
+ /// the offset to the location of the symbol name from the base of the GOT.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOTOFF
MO_GOTOFF,
-
+
/// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
/// offset to the GOT entry for the symbol name from the current code
- /// location.
+ /// location.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOTPCREL
MO_GOTPCREL,
-
+
/// MO_PLT - On a symbol operand this indicates that the immediate is
- /// offset to the PLT entry of symbol name from the current code location.
+ /// offset to the PLT entry of symbol name from the current code location.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @PLT
MO_PLT,
-
+
/// MO_TLSGD - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSGD
MO_TLSGD,
-
+
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @GOTTPOFF
MO_GOTTPOFF,
-
+
/// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @INDNTPOFF
MO_INDNTPOFF,
-
+
/// MO_TPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TPOFF
MO_TPOFF,
-
+
/// MO_NTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @NTPOFF
MO_NTPOFF,
-
+
/// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "__imp_FOO" symbol. This is used for
/// dllimport linkage on windows.
MO_DLLIMPORT,
-
+
/// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "FOO$stub" symbol. This is used for calls
/// and jumps to external functions on Tiger and earlier.
MO_DARWIN_STUB,
-
+
/// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
/// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
@@ -186,19 +186,19 @@ namespace X86II {
/// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
/// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
MO_DARWIN_NONLAZY_PIC_BASE,
-
+
/// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
/// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
/// which is a PIC-base-relative reference to a hidden dyld lazy pointer
/// stub.
MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
-
+
/// MO_TLVP - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// This is the TLS offset for the Darwin TLS mechanism.
MO_TLVP,
-
+
/// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
/// is some TLS offset from the picbase.
///
@@ -239,7 +239,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
return false;
}
}
-
+
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
@@ -299,7 +299,7 @@ namespace X86II {
// MRMInitReg - This form is used for instructions whose source and
// destinations are the same register.
MRMInitReg = 32,
-
+
//// MRM_C1 - A mod/rm byte of exactly 0xC1.
MRM_C1 = 33,
MRM_C2 = 34,
@@ -311,12 +311,14 @@ namespace X86II {
MRM_F0 = 40,
MRM_F8 = 41,
MRM_F9 = 42,
+ MRM_D0 = 45,
+ MRM_D1 = 46,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
/// immediates, the first of which is a 16-bit immediate (specified by
/// the imm encoding) and the second is a 8-bit fixed value.
RawFrmImm8 = 43,
-
+
/// RawFrmImm16 - This is used for CALL FAR instructions, which have two
/// immediates, the first of which is a 16 or 32-bit immediate (specified by
/// the imm encoding) and the second is a 16-bit fixed value. In the AMD
@@ -368,7 +370,7 @@ namespace X86II {
// T8, TA - Prefix after the 0x0F prefix.
T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
-
+
// TF - Prefix before and after 0x0F
TF = 15 << Op0Shift,
@@ -471,7 +473,7 @@ namespace X86II {
/// if a VR256 register is used, but some AVX instructions also have this
/// field marked when using a f256 memory references.
VEX_L = 1U << 4,
-
+
/// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
/// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
/// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
@@ -480,18 +482,18 @@ namespace X86II {
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
Has3DNow0F0FOpcode = 1U << 5
};
-
+
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
-
+
static inline bool hasImm(uint64_t TSFlags) {
return (TSFlags & X86II::ImmMask) != 0;
}
-
+
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
static inline unsigned getSizeOfImm(uint64_t TSFlags) {
@@ -506,7 +508,7 @@ namespace X86II {
case X86II::Imm64: return 8;
}
}
-
+
/// isImmPCRel - Return true if the immediate of the specified instruction's
/// TSFlags indicates that it is pc relative.
static inline unsigned isImmPCRel(uint64_t TSFlags) {
@@ -523,7 +525,7 @@ namespace X86II {
return false;
}
}
-
+
/// getMemoryOperandNo - The function returns the MCInst operand # for the
/// first field of the memory operand. If the instruction doesn't have a
/// memory operand, this returns -1.
@@ -551,7 +553,7 @@ namespace X86II {
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
-
+
// FIXME: Maybe lea should have its own form? This is a horrible hack.
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
// Opcode == X86::LEA16r || Opcode == X86::LEA32r)
@@ -577,6 +579,8 @@ namespace X86II {
case X86II::MRM_F0:
case X86II::MRM_F8:
case X86II::MRM_F9:
+ case X86II::MRM_D0:
+ case X86II::MRM_D1:
return -1;
}
}
@@ -609,7 +613,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
class X86InstrInfo : public TargetInstrInfoImpl {
X86TargetMachine &TM;
const X86RegisterInfo RI;
-
+
/// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
/// RegOp2MemOpTable2 - Load / store folding opcode maps.
///
@@ -617,7 +621,7 @@ class X86InstrInfo : public TargetInstrInfoImpl {
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
-
+
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
@@ -742,17 +746,6 @@ public:
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
- virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
- virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
virtual
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -802,7 +795,7 @@ public:
virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
-
+
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
/// to determine if two loads are loading from the same base address. It
/// should only return true if the base pointers are the same and the
@@ -836,7 +829,7 @@ public:
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
-
+
static bool isX86_64ExtendedReg(const MachineOperand &MO) {
if (!MO.isReg()) return false;
return isX86_64ExtendedReg(MO.getReg());
@@ -865,11 +858,13 @@ public:
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+ bool isHighLatencyDef(int opc) const;
+
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
const MachineRegisterInfo *MRI,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI, unsigned UseIdx) const;
-
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0a60eb7..f832a7c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -35,6 +35,20 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
def SDTX86BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
@@ -78,7 +92,7 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -114,10 +128,10 @@ def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
@@ -141,7 +155,7 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
@@ -153,43 +167,45 @@ def X86vaarg64 :
SDNPMemOperand]>;
def X86callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def X86callseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad]>;
def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
- [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
-def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
[SDNPCommutative]>;
+def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
@@ -203,10 +219,10 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
- []>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
@@ -257,6 +273,10 @@ def i8mem_NOREX : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
// Special i32mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
@@ -271,7 +291,8 @@ def i32mem_TC : Operand<i32> {
// after callee-saved register are popped.
def i64mem_TC : Operand<i64> {
let PrintMethod = "printi64mem";
- let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+ ptr_rc_tailcall, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
}
@@ -394,26 +415,26 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
def HasCMov : Predicate<"Subtarget->hasCMov()">;
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
-// FIXME: temporary hack to let codegen assert or generate poor code in case
-// no AVX version of the desired intructions is present, this is better for
-// incremental dev (without fallbacks it's easier to spot what's missing)
-def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
def Has3DNow : Predicate<"Subtarget->has3DNow()">;
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
-def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
-def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
-def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
-def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
-def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
-def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
-def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+
+def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
-def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
-def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
+def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
+def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
@@ -430,7 +451,6 @@ def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
-def HasAES : Predicate<"Subtarget->hasAES()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -1144,6 +1164,12 @@ def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
// Lock instruction prefix
def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>;
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+
// Repeat string operation instruction prefixes
// These uses the DF flag in the EFLAGS register to inc or dec ECX
let Defs = [ECX], Uses = [ECX,EFLAGS] in {
@@ -1267,6 +1293,12 @@ def : MnemonicAlias<"cdqe", "cltq">;
// lret maps to lretl, it is not ambiguous with lretq.
def : MnemonicAlias<"lret", "lretl">;
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
@@ -1374,6 +1406,9 @@ defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
def : InstAlias<"aad", (AAD8i8 10)>;
def : InstAlias<"aam", (AAM8i8 10)>;
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
// clr aliases.
def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>;
def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
@@ -1444,14 +1479,15 @@ defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
-// commute. We also allow fdivrp/fsubrp even though they don't commute, solely
-// because gas supports it.
+// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
-// We accepts "fnstsw %eax" even though it only writes %ax.
+// We accept "fnstsw %eax" even though it only writes %ax.
def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
def : InstAlias<"fnstsw" , (FNSTSW8r)>;
@@ -1497,6 +1533,10 @@ def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+
// movsd with no operands (as opposed to the SSE scalar move of a double) is an
// alias for movsl. (as in rep; movsd)
def : InstAlias<"movsd", (MOVSD)>;
@@ -1586,4 +1626,3 @@ def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>;
def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
-
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index e4e92cc..45e9051 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -180,6 +180,12 @@ def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
@@ -706,6 +712,9 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
}
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+ Requires<[HasAVX]>;
+
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>;
@@ -733,6 +742,9 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
}
+def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
+ Requires<[HasAVX]>;
+
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
@@ -1434,6 +1446,12 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
[(set FR64:$dst, fpimm0)]>,
Requires<[HasSSE2]>, TB, OpSize;
+def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
}
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
@@ -1566,19 +1584,13 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
// single r+r
- [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
- (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)))],
+ [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
// double r+r
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (bc_v2i64 (v2f64 VR128:$src2))))],
+ [],
// single r+m
- [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4i32 immAllOnesV))),
- (memopv2i64 addr:$src2))))],
+ [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
// double r+m
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (memopv2i64 addr:$src2)))]]>;
+ []]>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
@@ -2189,6 +2201,10 @@ let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>;
+def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ []>, XS, Requires<[HasSSE2]>;
+
let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
@@ -2518,15 +2534,11 @@ let ExeDomain = SSEPackedInt in {
}
def PANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>;
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
def PANDNrm : PDI<0xDF, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>;
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
}
} // Constraints = "$src1 = $dst"
@@ -3590,6 +3602,13 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
@@ -3640,10 +3659,27 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
//===---------------------------------------------------------------------===//
// Thread synchronization
-def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
- [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
- [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
+ [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
+ Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
+ Requires<[HasSSE3]>;
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+ Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+ Requires<[In64BitMode]>;
//===---------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -3659,7 +3695,7 @@ let Predicates = [HasSSE2] in
(CVTSS2SDrm addr:$src)>;
// bit_convert
-let Predicates = [HasSSE2] in {
+let Predicates = [HasXMMInt] in {
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
@@ -3692,6 +3728,10 @@ let Predicates = [HasSSE2] in {
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
}
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+}
+
// Move scalar to XMM zero-extended
// movd to XMM register zero-extends
let AddedComplexity = 15 in {
@@ -3865,27 +3905,6 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
-// Some special case pandn patterns.
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
// vector -> vector casts
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
@@ -4588,22 +4607,25 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
//===----------------------------------------------------------------------===//
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-let mayLoad = 1 in
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
@@ -4845,6 +4867,9 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
+ (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
@@ -4896,12 +4921,12 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
// Packed Compare Implicit Length Strings, Return Mask
multiclass pseudo_pcmpistrm<string asm> {
- def REG : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
imm:$src3))]>;
- def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
VR128:$src1, (load addr:$src2), imm:$src3))]>;
}
@@ -4932,12 +4957,12 @@ let Defs = [XMM0, EFLAGS] in {
// Packed Compare Explicit Length Strings, Return Mask
multiclass pseudo_pcmpestrm<string asm> {
- def REG : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
- def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
}
@@ -5419,6 +5444,23 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
@@ -5426,6 +5468,23 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
@@ -5563,11 +5622,15 @@ def : Pat<(X86Movddup (bc_v2f64
// Shuffle with UNPCKLPS
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
@@ -5584,20 +5647,24 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
// Shuffle with UNPCKLPD
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+ (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ (UNPCKLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
// Shuffle with UNPCKHPD
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+ (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ (UNPCKHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
@@ -5782,6 +5849,9 @@ def : Pat<(X86Movlps VR128:$src1,
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
// Shuffle with MOVLPD
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 1a58ba0..be7d5ef 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -207,10 +207,15 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+ "str{l}\t{$dst}", []>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+ "str{q}\t{$dst}", []>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
"ltr{w}\t{$src}", []>, TB;
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
@@ -388,3 +393,8 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+let Defs = [RDX, RAX], Uses = [RCX] in
+ def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [RDX, RAX, RCX] in
+ def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index e83f359..3f88fa6 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -19,7 +19,7 @@
#include "llvm/Function.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/Valgrind.h"
+#include "llvm/Support/Valgrind.h"
#include <cstdlib>
#include <cstring>
using namespace llvm;
@@ -127,9 +127,17 @@ extern "C" {
"movaps %xmm6, 96(%rsp)\n"
"movaps %xmm7, 112(%rsp)\n"
// JIT callee
+#ifdef _WIN64
+ "subq $32, %rsp\n"
+ "movq %rbp, %rcx\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rdx\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "addq $32, %rsp\n"
+#else
"movq %rbp, %rdi\n" // Pass prev frame and return address
"movq 8(%rbp), %rsi\n"
"call " ASMPREFIX "X86CompilationCallback2\n"
+#endif
// Restore all XMM arg registers
"movaps 112(%rsp), %xmm7\n"
"movaps 96(%rsp), %xmm6\n"
@@ -333,7 +341,7 @@ extern "C" {
extern "C" {
#if !(defined (X86_64_JIT) && defined(_MSC_VER))
// the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is
+ // unless we are under 64bit Windows with MSC, where there is
// no support for inline assembly
static
#endif
@@ -462,7 +470,7 @@ TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
JITCodeEmitter &JCE) {
- // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
// complains about casting a function pointer to a normal pointer.
#if defined (X86_32_JIT) && !defined (_MSC_VER)
bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index f45fdf5..6686214 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
enum AsmWriterFlavorTy {
@@ -68,7 +69,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
DwarfUsesInlineInfoSection = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::Dwarf;
+ ExceptionsType = ExceptionHandling::DwarfTable;
}
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
@@ -88,8 +89,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::Dwarf;
-
+ ExceptionsType = ExceptionHandling::DwarfTable;
+
// OpenBSD has buggy support for .quad in 32-bit mode, just split into two
// .words.
if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
@@ -98,13 +99,15 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
const MCSection *X86ELFMCAsmInfo::
getNonexecutableStackSection(MCContext &Ctx) const {
- return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
0, SectionKind::getMetadata());
}
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
- if (Triple.getArch() == Triple::x86_64)
+ if (Triple.getArch() == Triple::x86_64) {
GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index fea37f8..0e3b571 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -38,29 +38,6 @@ public:
~X86MCCodeEmitter() {}
- unsigned getNumFixupKinds() const {
- return 7;
- }
-
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[] = {
- { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_signed_4byte", 0, 4 * 8, 0},
- { "reloc_global_offset_table", 0, 4 * 8, 0}
- };
-
- if (Kind < FirstTargetFixupKind)
- return MCCodeEmitter::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
- }
-
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86RegisterInfo::getX86RegNum(MO.getReg());
}
@@ -173,13 +150,7 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
unsigned Size = X86II::getSizeOfImm(TSFlags);
bool isPCRel = X86II::isImmPCRel(TSFlags);
- switch (Size) {
- default: assert(0 && "Unknown immediate size");
- case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
- case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2;
- case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
- case 8: assert(!isPCRel); return FK_Data_8;
- }
+ return MCFixup::getKindForSize(Size, isPCRel);
}
/// Is32BitMemOperand - Return true if the specified instruction with a memory
@@ -218,18 +189,22 @@ void X86MCCodeEmitter::
EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
- // If this is a simple integer displacement that doesn't require a relocation,
- // emit it now.
+ const MCExpr *Expr = NULL;
if (DispOp.isImm()) {
- // FIXME: is this right for pc-rel encoding?? Probably need to emit this as
- // a fixup if so.
- EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
- return;
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (FixupKind != FK_PCRel_1 &&
+ FixupKind != FK_PCRel_2 &&
+ FixupKind != FK_PCRel_4) {
+ EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+ return;
+ }
+ Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+ } else {
+ Expr = DispOp.getExpr();
}
// If we have an immoffset, add it to the expression.
- const MCExpr *Expr = DispOp.getExpr();
-
if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
assert(ImmOffset == 0);
@@ -239,13 +214,13 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
// If the fixup is pc-relative, we need to bias the value to be relative to
// the start of the field, not the end of the field.
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
+ if (FixupKind == FK_PCRel_4 ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
ImmOffset -= 4;
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte))
+ if (FixupKind == FK_PCRel_2)
ImmOffset -= 2;
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
+ if (FixupKind == FK_PCRel_1)
ImmOffset -= 1;
if (ImmOffset)
@@ -1004,6 +979,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(BaseOpcode, CurByte, OS);
EmitByte(0xF9, CurByte, OS);
break;
+ case X86II::MRM_D0:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xD0, CurByte, OS);
+ break;
+ case X86II::MRM_D1:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xD1, CurByte, OS);
+ break;
}
// If there is a remaining operand, it must be a trailing immediate. Emit it
@@ -1021,7 +1004,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Fixups);
} else {
unsigned FixupKind;
- if (MI.getOpcode() == X86::MOV64ri32 || MI.getOpcode() == X86::MOV64mi32)
+ // FIXME: Is there a better way to know that we need a signed relocation?
+ if (MI.getOpcode() == X86::MOV64ri32 ||
+ MI.getOpcode() == X86::MOV64mi32 ||
+ MI.getOpcode() == X86::PUSH64i32)
FixupKind = X86::reloc_signed_4byte;
else
FixupKind = getImmFixupKind(TSFlags);
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 05a5de6..cbe6db2 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -374,6 +374,8 @@ ReSimplify:
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
@@ -525,6 +527,66 @@ ReSimplify:
}
}
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+ X86MCInstLower &MCInstLowering,
+ const MachineInstr &MI) {
+ bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+ MCContext &context = OutStreamer.getContext();
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+ MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+ const MCSymbolRefExpr *symRef =
+ MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+
+ MCInst LEA;
+ if (is64Bits) {
+ LEA.setOpcode(X86::LEA64r);
+ LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+ LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(0)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ } else {
+ LEA.setOpcode(X86::LEA32r);
+ LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+ LEA.addOperand(MCOperand::CreateReg(0)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ }
+ OutStreamer.EmitInstruction(LEA);
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::REX64_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+
+ MCInst call;
+ if (is64Bits)
+ call.setOpcode(X86::CALL64pcrel32);
+ else
+ call.setOpcode(X86::CALLpcrel32);
+ StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+ MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+ const MCSymbolRefExpr *tlsRef =
+ MCSymbolRefExpr::Create(tlsGetAddr,
+ MCSymbolRefExpr::VK_PLT,
+ context);
+
+ call.addOperand(MCOperand::CreateExpr(tlsRef));
+ OutStreamer.EmitInstruction(call);
+}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(Mang, *MF, *this);
@@ -559,7 +621,11 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Lower these as normal, but add some comments.
OutStreamer.AddComment("TAILCALL");
break;
-
+
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
case X86::MOVPC32r: {
MCInst TmpInst;
// This is a pseudo op for a two instruction sequence with a label, which
diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp
new file mode 100644
index 0000000..8f3dd32
--- /dev/null
+++ b/lib/Target/X86/X86MachObjectWriter.cpp
@@ -0,0 +1,32 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+using namespace llvm;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+};
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 0f505d1..2f6bd88 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -31,7 +31,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -60,7 +60,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
Is64Bit = Subtarget->is64Bit();
IsWin64 = Subtarget->isTargetWin64();
- StackAlign = TM.getFrameInfo()->getStackAlignment();
+ StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Is64Bit) {
SlotSize = 8;
@@ -316,10 +316,16 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64RegClass;
return &X86::GR32RegClass;
- case 1: // Normal GRPs except the stack pointer (for encoding reasons).
+ case 1: // Normal GPRs except the stack pointer (for encoding reasons).
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
+ case 2: // Available for tailcall (not callee-saved GPRs).
+ if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+ return &X86::GR64_TCW64RegClass;
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_TCRegClass;
+ return &X86::GR32_TCRegClass;
}
}
@@ -388,6 +394,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
// Set the stack-pointer register and its aliases as reserved.
Reserved.set(X86::RSP);
Reserved.set(X86::ESP);
@@ -400,7 +408,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::IP);
// Set the frame-pointer register and its aliases as reserved if needed.
- if (hasFP(MF)) {
+ if (TFI->hasFP(MF)) {
Reserved.set(X86::RBP);
Reserved.set(X86::EBP);
Reserved.set(X86::BP);
@@ -425,21 +433,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register. This is true if the function has variable sized allocas
-/// or if frame pointer elimination is disabled.
-bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const MachineModuleInfo &MMI = MF.getMMI();
-
- return (DisableFramePointerElim(MF) ||
- needsStackRealignment(MF) ||
- MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() ||
- MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit());
-}
-
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return (RealignStack &&
@@ -458,62 +451,25 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
"Stack realignment in presense of dynamic allocas is not supported");
-
+
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
return canRealignStack(MF);
-
- return requiresRealignment && canRealignStack(MF);
-}
-bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
- return !MF.getFrameInfo()->hasVarSizedObjects();
+ return requiresRealignment && canRealignStack(MF);
}
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
- if (Reg == FramePtr && hasFP(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (Reg == FramePtr && TFI->hasFP(MF)) {
FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
return true;
}
return false;
}
-int
-X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
- uint64_t StackSize = MFI->getStackSize();
-
- if (needsStackRealignment(MF)) {
- if (FI < 0) {
- // Skip the saved EBP.
- Offset += SlotSize;
- } else {
- unsigned Align = MFI->getObjectAlignment(FI);
- assert((-(Offset + StackSize)) % Align == 0);
- Align = 0;
- return Offset + StackSize;
- }
- // FIXME: Support tail calls
- } else {
- if (!hasFP(MF))
- return Offset + StackSize;
-
- // Skip the saved EBP.
- Offset += SlotSize;
-
- // Skip the RETADDR move area
- const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0)
- Offset -= TailCallReturnAddrDelta;
- }
-
- return Offset;
-}
-
static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
if (is64Bit) {
if (isInt<8>(Imm))
@@ -541,69 +497,70 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
void X86RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
+ int Opcode = I->getOpcode();
+ bool isDestroy = Opcode == getCallFrameDestroyOpcode();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reseveCallFrame) {
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
// adjcallstackdown instruction into 'add ESP, <amt>'
// TODO: consider using push / pop instead of sub + store / add
- MachineInstr *Old = I;
- uint64_t Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
- MachineInstr *New = 0;
- if (Old->getOpcode() == getCallFrameSetupOpcode()) {
- New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(getSUBriOpcode(Is64Bit, Amount)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Amount);
- } else {
- assert(Old->getOpcode() == getCallFrameDestroyOpcode());
-
- // Factor out the amount the callee already popped.
- uint64_t CalleeAmt = Old->getOperand(1).getImm();
- Amount -= CalleeAmt;
-
- if (Amount) {
- unsigned Opc = getADDriOpcode(Is64Bit, Amount);
- New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(Amount);
- }
- }
+ if (Amount == 0)
+ return;
+
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = 0;
+ if (Opcode == getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
+ } else {
+ assert(Opcode == getCallFrameDestroyOpcode());
- if (New) {
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
+ // Factor out the amount the callee already popped.
+ Amount -= CalleeAmt;
- // Replace the pseudo instruction with a new instruction.
- MBB.insert(I, New);
+ if (Amount) {
+ unsigned Opc = getADDriOpcode(Is64Bit, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
}
}
- } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
- if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
- unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
- MachineInstr *Old = I;
- MachineInstr *New =
- BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
- StackPtr)
- .addReg(StackPtr)
- .addImm(CalleeAmt);
+ if (New) {
// The EFLAGS implicit def is dead.
New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction.
MBB.insert(I, New);
}
+
+ return;
}
- MBB.erase(I);
+ if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(CalleeAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+ MBB.insert(I, New);
+ }
}
void
@@ -614,6 +571,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned i = 0;
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
while (!MI.getOperand(i).isFI()) {
++i;
@@ -630,7 +588,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else if (AfterFPPop)
BasePtr = StackPtr;
else
- BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
+ BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
@@ -640,11 +598,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FIOffset;
if (AfterFPPop) {
// Tail call jmp happens after FP is popped.
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
const MachineFrameInfo *MFI = MF.getFrameInfo();
- FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
+ FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
} else
- FIOffset = getFrameIndexOffset(MF, FrameIndex);
+ FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
if (MI.getOperand(i+3).isImm()) {
// Offset is a 32-bit integer.
@@ -657,68 +614,14 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-void
-X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
- if (TailCallReturnAddrDelta < 0) {
- // create RETURNADDR area
- // arg
- // arg
- // RETADDR
- // { ...
- // RETADDR area
- // ...
- // }
- // [EBP]
- MFI->CreateFixedObject(-TailCallReturnAddrDelta,
- (-1U*SlotSize)+TailCallReturnAddrDelta, true);
- }
-
- if (hasFP(MF)) {
- assert((TailCallReturnAddrDelta <= 0) &&
- "The Delta should always be zero or negative");
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-
- // Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MFI->CreateFixedObject(SlotSize,
- -(int)SlotSize +
- TFI.getOffsetOfLocalArea() +
- TailCallReturnAddrDelta,
- true);
- assert(FrameIdx == MFI->getObjectIndexBegin() &&
- "Slot for EBP register must be last in order to be found!");
- FrameIdx = 0;
- }
-}
-
unsigned X86RegisterInfo::getRARegister() const {
return Is64Bit ? X86::RIP // Should have dwarf #16.
: X86::EIP; // Should have dwarf #8.
}
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return hasFP(MF) ? FramePtr : StackPtr;
-}
-
-void
-X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- // Calculate amount of bytes used for return address storing
- int stackGrowth = (Is64Bit ? -8 : -4);
-
- // Initial state of the frame pointer is esp+stackGrowth.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(0, Dst, Src));
-
- // Add return address to move list
- MachineLocation CSDst(StackPtr, stackGrowth);
- MachineLocation CSSrc(getRARegister());
- Moves.push_back(MachineMove(0, CSDst, CSSrc));
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
unsigned X86RegisterInfo::getEHExceptionRegister() const {
@@ -917,13 +820,13 @@ namespace {
// Be over-conservative: scan over all vreg defs and find whether vector
// registers are used. If yes, there is a possibility that vector register
// will be spilled and thus require dynamic stack realignment.
- for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
- RegNum < RI.getLastVirtReg(); ++RegNum)
- if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
+ for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
FuncInfo->setReserveFP(true);
return true;
}
-
+ }
// Nothing to do
return false;
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 0796191..064be64 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -111,14 +111,10 @@ public:
/// register scavenger to determine what registers are free.
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
-
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
@@ -129,9 +125,6 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, RegScavenger *RS = NULL) const;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -139,9 +132,6 @@ public:
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
-
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 8fc26d7..612fac2 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -1,10 +1,10 @@
//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 Register file, defining the registers themselves,
@@ -34,8 +34,8 @@ let Namespace = "X86" in {
// because the register file generator is smart enough to figure out that
// AL aliases AX if we tell it that AX aliased AL (for example).
- // Dwarf numbering is different for 32-bit and 64-bit, and there are
- // variations by target as well. Currently the first entry is for X86-64,
+ // Dwarf numbering is different for 32-bit and 64-bit, and there are
+ // variations by target as well. Currently the first entry is for X86-64,
// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
// and debug information on X86-32/Darwin)
@@ -81,7 +81,7 @@ let Namespace = "X86" in {
def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
}
def IP : Register<"ip">, DwarfRegNum<[16]>;
-
+
// X86-64 only
let SubRegIndices = [sub_8bit] in {
def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
@@ -103,8 +103,8 @@ let Namespace = "X86" in {
def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
- def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
-
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
+
// X86-64 only
def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
@@ -208,7 +208,7 @@ let Namespace = "X86" in {
def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
- def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
+ def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
// Status flags register
def EFLAGS : Register<"flags">;
@@ -220,7 +220,7 @@ let Namespace = "X86" in {
def ES : Register<"es">;
def FS : Register<"fs">;
def GS : Register<"gs">;
-
+
// Debug registers
def DR0 : Register<"dr0">;
def DR1 : Register<"dr1">;
@@ -230,7 +230,7 @@ let Namespace = "X86" in {
def DR5 : Register<"dr5">;
def DR6 : Register<"dr6">;
def DR7 : Register<"dr7">;
-
+
// Control registers
def CR0 : Register<"cr0">;
def CR1 : Register<"cr1">;
@@ -261,10 +261,10 @@ let Namespace = "X86" in {
// implicitly defined to be the register allocation order.
//
-// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
-// R8B, ... R15B.
+// R8B, ... R15B.
// Allocate R12 and R13 last, as these require an extra byte when
// encoded in x86_64 instructions.
// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
@@ -299,14 +299,14 @@ def GR8 : RegisterClass<"X86", [i8], 8,
GR8Class::iterator
GR8Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
if (!Subtarget.is64Bit())
// In 32-mode, none of the 8-bit registers aliases EBP or ESP.
return begin() + 8;
- else if (RI->hasFP(MF) || MFI->getReserveFP())
+ else if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SPL or BPL.
return array_endof(X86_GR8_AO_64) - 1;
else
@@ -344,12 +344,12 @@ def GR16 : RegisterClass<"X86", [i16], 16,
GR16Class::iterator
GR16Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return array_endof(X86_GR16_AO_64) - 1;
else
@@ -357,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
return array_endof(X86_GR16_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return begin() + 6;
else
@@ -396,12 +396,12 @@ def GR32 : RegisterClass<"X86", [i32], 32,
GR32Class::iterator
GR32Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return array_endof(X86_GR32_AO_64) - 1;
else
@@ -409,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
return array_endof(X86_GR32_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return begin() + 6;
else
@@ -436,13 +436,13 @@ def GR64 : RegisterClass<"X86", [i64], 64,
GR64Class::iterator
GR64Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (!Subtarget.is64Bit())
return begin(); // None of these are allocatable in 32-bit.
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
return end()-3; // If so, don't allocate RIP, RSP or RBP
else
return end()-2; // If not, just don't allocate RIP or RSP
@@ -496,6 +496,9 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
(GR32_TC sub_32bit)];
}
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
+ R8, R9, R11]>;
+
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
[AL, CL, DL, AH, CH, DH, BL, BH]> {
@@ -541,10 +544,10 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
GR16_NOREXClass::iterator
GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return end() - 2;
else
@@ -565,10 +568,10 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
GR32_NOREXClass::iterator
GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return end() - 2;
else
@@ -590,10 +593,10 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
GR64_NOREXClass::iterator
GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate RIP, RSP or RBP.
return end() - 3;
else
@@ -632,12 +635,12 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
GR32_NOSPClass::iterator
GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate EBP.
return array_endof(X86_GR32_NOSP_AO_64) - 1;
else
@@ -645,7 +648,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
return array_endof(X86_GR32_NOSP_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate EBP.
return begin() + 6;
else
@@ -670,13 +673,13 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
GR64_NOSPClass::iterator
GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (!Subtarget.is64Bit())
return begin(); // None of these are allocatable in 32-bit.
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
return end()-1; // If so, don't allocate RBP
else
return end(); // If not, any reg in this class is ok.
@@ -698,10 +701,10 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const
{
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate RBP.
return end() - 1;
else
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 0c5a0ac..42e8193 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -136,7 +136,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
Dst, InFlag);
InFlag = Chain.getValue(1);
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
@@ -150,7 +150,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
X86::ECX,
Left, InFlag);
InFlag = Chain.getValue(1);
- Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
} else if (BytesLeft) {
@@ -230,7 +230,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Src, InFlag);
InFlag = Chain.getValue(1);
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
array_lengthof(Ops));
diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/X86ShuffleDecode.h
deleted file mode 100644
index df04052..0000000
--- a/lib/Target/X86/X86ShuffleDecode.h
+++ /dev/null
@@ -1,155 +0,0 @@
-//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Define several functions to decode x86 specific shuffle semantics into a
-// generic vector mask.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_SHUFFLE_DECODE_H
-#define X86_SHUFFLE_DECODE_H
-
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Vector Mask Decoding
-//===----------------------------------------------------------------------===//
-
-enum {
- SM_SentinelZero = ~0U
-};
-
-static inline
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
- // Defaults the copying the dest value.
- ShuffleMask.push_back(0);
- ShuffleMask.push_back(1);
- ShuffleMask.push_back(2);
- ShuffleMask.push_back(3);
-
- // Decode the immediate.
- unsigned ZMask = Imm & 15;
- unsigned CountD = (Imm >> 4) & 3;
- unsigned CountS = (Imm >> 6) & 3;
-
- // CountS selects which input element to use.
- unsigned InVal = 4+CountS;
- // CountD specifies which element of destination to update.
- ShuffleMask[CountD] = InVal;
- // ZMask zaps values, potentially overriding the CountD elt.
- if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
- if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
- if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
- if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
-}
-
-// <3,1> or <6,7,2,3>
-static void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = NElts/2; i != NElts; ++i)
- ShuffleMask.push_back(NElts+i);
-
- for (unsigned i = NElts/2; i != NElts; ++i)
- ShuffleMask.push_back(i);
-}
-
-// <0,2> or <0,1,4,5>
-static void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i)
- ShuffleMask.push_back(i);
-
- for (unsigned i = 0; i != NElts/2; ++i)
- ShuffleMask.push_back(NElts+i);
-}
-
-static void DecodePSHUFMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts; ++i) {
- ShuffleMask.push_back(Imm % NElts);
- Imm /= NElts;
- }
-}
-
-static void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- ShuffleMask.push_back(0);
- ShuffleMask.push_back(1);
- ShuffleMask.push_back(2);
- ShuffleMask.push_back(3);
- for (unsigned i = 0; i != 4; ++i) {
- ShuffleMask.push_back(4+(Imm & 3));
- Imm >>= 2;
- }
-}
-
-static void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != 4; ++i) {
- ShuffleMask.push_back((Imm & 3));
- Imm >>= 2;
- }
- ShuffleMask.push_back(4);
- ShuffleMask.push_back(5);
- ShuffleMask.push_back(6);
- ShuffleMask.push_back(7);
-}
-
-static void DecodePUNPCKLMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i);
- ShuffleMask.push_back(i+NElts);
- }
-}
-
-static void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i+NElts/2);
- ShuffleMask.push_back(i+NElts+NElts/2);
- }
-}
-
-static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- // Part that reads from dest.
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(Imm % NElts);
- Imm /= NElts;
- }
- // Part that reads from src.
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(Imm % NElts + NElts);
- Imm /= NElts;
- }
-}
-
-static void DecodeUNPCKHPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i+NElts/2); // Reads from dest
- ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
- }
-}
-
-
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc. NElts indicates the number of elements in the vector allowing it to
-/// handle different datatypes and vector widths.
-static void DecodeUNPCKLPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i); // Reads from dest
- ShuffleMask.push_back(i+NElts); // Reads from src
- }
-}
-
-#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 0d02e5e..1ee7312 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -1,4 +1,4 @@
-//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===//
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,7 +18,7 @@
#include "llvm/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallVector.h"
@@ -256,13 +256,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
if ((ECX >> 19) & 1) X86SSELevel = SSE41;
if ((ECX >> 20) & 1) X86SSELevel = SSE42;
+ // FIXME: AVX codegen support is not ready.
+ //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
- HasAVX = ((ECX >> 28) & 0x1);
HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) {
@@ -289,6 +290,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, X863DNowLevel(NoThreeDNow)
, HasCMov(false)
, HasX86_64(false)
+ , HasPOPCNT(false)
, HasSSE4A(false)
, HasAVX(false)
, HasAES(false)
@@ -315,11 +317,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
ParseSubtargetFeatures(FS, CPU);
// All X86-64 CPUs also have SSE2, however user might request no SSE via
// -mattr, so don't force SSELevel here.
+ if (HasAVX)
+ X86SSELevel = NoMMXSSE;
} else {
// Otherwise, use CPUID to auto-detect feature set.
AutoDetectSubtargetFeatures();
// Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
- if (Is64Bit && X86SSELevel < SSE2)
+ if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
X86SSELevel = SSE2;
}
@@ -338,9 +342,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
assert((!Is64Bit || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
- // bit targets.
- if (isTargetDarwin() || Is64Bit)
+ // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+ // 32 and 64 bit) and for all 64-bit targets.
+ if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+ isTargetSolaris() || Is64Bit)
stackAlignment = 16;
if (StackAlignment)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 2e73124..0a62a02 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -65,6 +65,9 @@ protected:
///
bool HasX86_64;
+ /// HasPOPCNT - True if the processor supports POPCNT.
+ bool HasPOPCNT;
+
/// HasSSE4A - True if the processor supports SSE4A instructions.
bool HasSSE4A;
@@ -100,7 +103,7 @@ protected:
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
unsigned MaxInlineSizeThreshold;
-
+
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
@@ -150,7 +153,10 @@ public:
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAVX() const { return HasAVX; }
+ bool hasXMM() const { return hasSSE1() || hasAVX(); }
+ bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
bool hasAES() const { return HasAES; }
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
@@ -160,23 +166,23 @@ public:
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
-
+ bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; }
+ bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; }
+
// ELF is a reasonably sane default and the only other X86 targets we
// support are Darwin and Windows. Just use "not those".
- bool isTargetELF() const {
+ bool isTargetELF() const {
return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
}
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
- bool isTargetMingw() const {
- return TargetTriple.getOS() == Triple::MinGW32 ||
- TargetTriple.getOS() == Triple::MinGW64; }
+ bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
bool isTargetCygMing() const {
return isTargetMingw() || isTargetCygwin();
}
-
+
/// isTargetCOFF - Return true if this is any COFF/Windows target variant.
bool isTargetCOFF() const {
return isTargetMingw() || isTargetCygwin() || isTargetWindows();
@@ -186,6 +192,10 @@ public:
return Is64Bit && (isTargetMingw() || isTargetWindows());
}
+ bool isTargetEnvMacho() const {
+ return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
+ }
+
bool isTargetWin32() const {
return !Is64Bit && (isTargetMingw() || isTargetWindows());
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 4c01e60..8fb9470 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -30,10 +30,12 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
case Triple::Darwin:
return new X86MCAsmInfoDarwin(TheTriple);
case Triple::MinGW32:
- case Triple::MinGW64:
case Triple::Cygwin:
case Triple::Win32:
- return new X86MCAsmInfoCOFF(TheTriple);
+ if (TheTriple.getEnvironment() == Triple::MachO)
+ return new X86MCAsmInfoDarwin(TheTriple);
+ else
+ return new X86MCAsmInfoCOFF(TheTriple);
default:
return new X86ELFMCAsmInfo(TheTriple);
}
@@ -43,22 +45,25 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
MCContext &Ctx, TargetAsmBackend &TAB,
raw_ostream &_OS,
MCCodeEmitter *_Emitter,
- bool RelaxAll) {
+ bool RelaxAll,
+ bool NoExecStack) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
case Triple::Darwin:
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
case Triple::MinGW32:
- case Triple::MinGW64:
case Triple::Cygwin:
case Triple::Win32:
- return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+ if (TheTriple.getEnvironment() == Triple::MachO)
+ return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ else
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
default:
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
}
-extern "C" void LLVMInitializeX86Target() {
+extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
@@ -91,11 +96,11 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: X86TargetMachine(T, TT, FS, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
- "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
(getSubtargetImpl()->isTargetCygMing() ||
getSubtargetImpl()->isTargetWindows()) ?
- "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
- "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
@@ -106,7 +111,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: X86TargetMachine(T, TT, FS, true),
- DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
@@ -115,11 +120,11 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool is64Bit)
- : LLVMTargetMachine(T, TT),
+ : LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64Bit),
- FrameInfo(Subtarget),
+ FrameLowering(*this, Subtarget),
ELFWriterInfo(is64Bit, true) {
DefRelocModel = getRelocationModel();
@@ -227,12 +232,12 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE) {
// FIXME: Move this to TargetJITInfo!
// On Darwin, do not override 64-bit setting made in X86TargetMachine().
- if (DefRelocModel == Reloc::Default &&
+ if (DefRelocModel == Reloc::Default &&
(!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
setRelocationModel(Reloc::Static);
Subtarget.setPICStyle(PICStyles::None);
}
-
+
PM.add(createX86JITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 8ba7a71..5973922 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -18,13 +18,13 @@
#include "X86ELFWriterInfo.h"
#include "X86InstrInfo.h"
#include "X86ISelLowering.h"
-#include "X86FrameInfo.h"
+#include "X86FrameLowering.h"
#include "X86JITInfo.h"
#include "X86SelectionDAGInfo.h"
#include "X86Subtarget.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -32,7 +32,7 @@ class formatted_raw_ostream;
class X86TargetMachine : public LLVMTargetMachine {
X86Subtarget Subtarget;
- X86FrameInfo FrameInfo;
+ X86FrameLowering FrameLowering;
X86ELFWriterInfo ELFWriterInfo;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
@@ -48,7 +48,9 @@ public:
virtual const X86InstrInfo *getInstrInfo() const {
llvm_unreachable("getInstrInfo not implemented");
}
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
virtual X86JITInfo *getJITInfo() {
llvm_unreachable("getJITInfo not implemented");
}