diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86.h | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 262 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 35 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 19 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.h | 20 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetTransformInfo.cpp | 355 |
7 files changed, 377 insertions, 318 deletions
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 19912cc..95f1f22 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -30,6 +30,7 @@ set(sources X86Subtarget.cpp X86TargetMachine.cpp X86TargetObjectFile.cpp + X86TargetTransformInfo.cpp X86VZeroUpper.cpp ) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1e7b98d..2bff7ab 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -63,6 +63,9 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, /// FunctionPass *createEmitX86CodeToMemory(); +/// \brief Creates an X86-specific Target Transformation Info pass. +ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM); + } // End llvm namespace #endif diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f482ac9..4b00b46 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18063,265 +18063,3 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return Res; } - -//===----------------------------------------------------------------------===// -// -// X86 cost model. -// -//===----------------------------------------------------------------------===// - -struct X86CostTblEntry { - int ISD; - MVT Type; - unsigned Cost; -}; - -static int -FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) - return i; - - // Could not find an entry. - return -1; -} - -struct X86TypeConversionCostTblEntry { - int ISD; - MVT Dst; - MVT Src; - unsigned Cost; -}; - -static int -FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, - int ISD, MVT Dst, MVT Src) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) - return i; - - // Could not find an entry. - return -1; -} - -ScalarTargetTransformInfo::PopcntHwSupport -X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const { - assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - // TODO: Currently the __builtin_popcount() implementation using SSE3 - // instructions is inefficient. Once the problem is fixed, we should - // call ST.hasSSE3() instead of ST.hasSSE4(). - return ST.hasSSE41() ? Fast : None; -} - -unsigned X86VectorTargetTransformInfo::getNumberOfRegisters(bool Vector) const { - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - if (ST.is64Bit()) - return 16; - return 8; -} - -unsigned -X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, - Type *Ty) const { - // Legalize the type. - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty); - - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - static const X86CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::MUL, MVT::v4i64, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - }; - - // Look for AVX1 lowering tricks. - if (ST.hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, - LT.second); - if (Idx != -1) - return LT.first * AVX1CostTable[Idx].Cost; - } - // Fallback to the default implementation. - return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); -} - -unsigned -X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - // Legalize the type. - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src); - assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && - "Invalid Opcode"); - - const X86Subtarget &ST = - TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - // Each load/store unit costs 1. - unsigned Cost = LT.first * 1; - - // On Sandybridge 256bit load/stores are double pumped - // (but not on Haswell). - if (LT.second.getSizeInBits() > 128 && !ST.hasAVX2()) - Cost*=2; - - return Cost; -} - -unsigned -X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - assert(Val->isVectorTy() && "This must be a vector type"); - - if (Index != -1U) { - // Legalize the type. - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Val); - - // This type is legalized to a scalar type. - if (!LT.second.isVector()) - return 0; - - // The type may be split. Normalize the index to the new type. - unsigned Width = LT.second.getVectorNumElements(); - Index = Index % Width; - - // Floating point scalars are already located in index #0. - if (Val->getScalarType()->isFloatingPointTy() && Index == 0) - return 0; - } - - return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index); -} - -unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, - Type *ValTy, - Type *CondTy) const { - // Legalize the type. - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy); - - MVT MTy = LT.second; - - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - const X86Subtarget &ST = - TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - static const X86CostTblEntry SSE42CostTbl[] = { - { ISD::SETCC, MVT::v2f64, 1 }, - { ISD::SETCC, MVT::v4f32, 1 }, - { ISD::SETCC, MVT::v2i64, 1 }, - { ISD::SETCC, MVT::v4i32, 1 }, - { ISD::SETCC, MVT::v8i16, 1 }, - { ISD::SETCC, MVT::v16i8, 1 }, - }; - - static const X86CostTblEntry AVX1CostTbl[] = { - { ISD::SETCC, MVT::v4f64, 1 }, - { ISD::SETCC, MVT::v8f32, 1 }, - // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, MVT::v4i64, 4 }, - { ISD::SETCC, MVT::v8i32, 4 }, - { ISD::SETCC, MVT::v16i16, 4 }, - { ISD::SETCC, MVT::v32i8, 4 }, - }; - - static const X86CostTblEntry AVX2CostTbl[] = { - { ISD::SETCC, MVT::v4i64, 1 }, - { ISD::SETCC, MVT::v8i32, 1 }, - { ISD::SETCC, MVT::v16i16, 1 }, - { ISD::SETCC, MVT::v32i8, 1 }, - }; - - if (ST.hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; - } - - if (ST.hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTbl[Idx].Cost; - } - - if (ST.hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; - } - - return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); -} - -unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, - Type *Dst, - Type *Src) const { - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); - - if (!SrcTy.isSimple() || !DstTy.isSimple()) - return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); - - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, - }; - - if (ST.hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, - array_lengthof(AVXConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return AVXConversionTbl[Idx].Cost; - } - - return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); -} - - -unsigned X86VectorTargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, - Type *SubTp) const { - // We only estimate the cost of reverse shuffles. - if (Kind != Reverse) - return VectorTargetTransformImpl::getShuffleCost(Kind, Tp, Index, SubTp); - - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Tp); - unsigned Cost = 1; - if (LT.second.getSizeInBits() > 128) - Cost = 3; // Extract + insert + copy. - - // Multiple by the number of parts. - return Cost * LT.first; -} - diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 86b7764..16ce364 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -23,7 +23,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { namespace X86ISD { @@ -945,40 +944,6 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } - - class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl { - public: - explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) : - ScalarTargetTransformImpl(TL) {}; - - virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const; - }; - - class X86VectorTargetTransformInfo : public VectorTargetTransformImpl { - public: - explicit X86VectorTargetTransformInfo(const TargetLowering *TL) : - VectorTargetTransformImpl(TL) {} - - virtual unsigned getNumberOfRegisters(bool Vector) const; - - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; - - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const; - - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; - - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; - - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; - - unsigned getShuffleCost(ShuffleKind Kind, - Type *Tp, int Index, Type *SubTp) const; - }; } #endif // X86ISELLOWERING_H diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index ea99796..847e06b 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -48,8 +48,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TLInfo(*this), TSInfo(*this), - JITInfo(*this), - STTI(&TLInfo), VTTI(&TLInfo) { + JITInfo(*this) { } void X86_64TargetMachine::anchor() { } @@ -65,8 +64,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TLInfo(*this), TSInfo(*this), - JITInfo(*this), - STTI(&TLInfo), VTTI(&TLInfo){ + JITInfo(*this) { } /// X86TargetMachine ctor - Create an X86 target. @@ -121,6 +119,19 @@ X86EarlyIfConv("x86-early-ifcvt", cl::desc("Enable early if-conversion on X86")); //===----------------------------------------------------------------------===// +// X86 Analysis Pass Setup +//===----------------------------------------------------------------------===// + +void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our X86 pass. This + // allows the X86 pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createX86TargetTransformInfoPass(this)); +} + + +//===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index a0749f0..174d391 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -24,7 +24,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -65,6 +64,9 @@ public: return &InstrItins; } + /// \brief Register X86 analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); + // Set up the pass pipeline. virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); @@ -81,8 +83,6 @@ class X86_32TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; - ScalarTargetTransformImpl STTI; - X86VectorTargetTransformInfo VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -101,12 +101,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; /// X86_64TargetMachine - X86 64-bit target machine. @@ -118,8 +112,6 @@ class X86_64TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; - X86ScalarTargetTransformImpl STTI; - X86VectorTargetTransformInfo VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -138,12 +130,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp new file mode 100644 index 0000000..f5aa577 --- /dev/null +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -0,0 +1,355 @@ +//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// X86 target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86tti" +#include "X86.h" +#include "X86TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/TargetTransformInfo.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeX86TTIPass(PassRegistry &); +} + +namespace { + +class X86TTI : public ImmutablePass, public TargetTransformInfo { + const X86TargetMachine *TM; + const X86Subtarget *ST; + const X86TargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + X86TTI(const X86TargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializeX86TTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti", + "X86 Target Transform Info", true, true, false) +char X86TTI::ID = 0; + +ImmutablePass * +llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { + return new X86TTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// X86 cost model. +// +//===----------------------------------------------------------------------===// + +namespace { +struct X86CostTblEntry { + int ISD; + MVT Type; + unsigned Cost; +}; +} + +static int +FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) + return i; + + // Could not find an entry. + return -1; +} + +namespace { +struct X86TypeConversionCostTblEntry { + int ISD; + MVT Dst; + MVT Src; + unsigned Cost; +}; +} + +static int +FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, + int ISD, MVT Dst, MVT Src) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) + return i; + + // Could not find an entry. + return -1; +} + + +X86TTI::PopcntHwSupport X86TTI::getPopcntHwSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // TODO: Currently the __builtin_popcount() implementation using SSE3 + // instructions is inefficient. Once the problem is fixed, we should + // call ST->hasSSE3() instead of ST->hasSSE4(). + return ST->hasSSE41() ? Fast : None; +} + +unsigned X86TTI::getNumberOfRegisters(bool Vector) const { + if (ST->is64Bit()) + return 16; + return 8; +} + +unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const X86CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::MUL, MVT::v4i64, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + }; + + // Look for AVX1 lowering tricks. + if (ST->hasAVX()) { + int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + LT.second); + if (Idx != -1) + return LT.first * AVX1CostTable[Idx].Cost; + } + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + // We only estimate the cost of reverse shuffles. + if (Kind != Reverse) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + unsigned Cost = 1; + if (LT.second.getSizeInBits() > 128) + Cost = 3; // Extract + insert + copy. + + // Multiple by the number of parts. + return Cost * LT.first; +} + +unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + + static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, + }; + + if (ST->hasAVX()) { + int Idx = FindInConvertTable(AVXConversionTbl, + array_lengthof(AVXConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVXConversionTbl[Idx].Cost; + } + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + + MVT MTy = LT.second; + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const X86CostTblEntry SSE42CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + { ISD::SETCC, MVT::v2i64, 1 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, + }; + + static const X86CostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1 }, + { ISD::SETCC, MVT::v8f32, 1 }, + // AVX1 does not support 8-wide integer compare. + { ISD::SETCC, MVT::v4i64, 4 }, + { ISD::SETCC, MVT::v8i32, 4 }, + { ISD::SETCC, MVT::v16i16, 4 }, + { ISD::SETCC, MVT::v32i8, 4 }, + }; + + static const X86CostTblEntry AVX2CostTbl[] = { + { ISD::SETCC, MVT::v4i64, 1 }, + { ISD::SETCC, MVT::v8i32, 1 }, + { ISD::SETCC, MVT::v16i16, 1 }, + { ISD::SETCC, MVT::v32i8, 1 }, + }; + + if (ST->hasAVX2()) { + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX2CostTbl[Idx].Cost; + } + + if (ST->hasAVX()) { + int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTbl[Idx].Cost; + } + + if (ST->hasSSE42()) { + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTbl[Idx].Cost; + } + + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + if (Index != -1U) { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); + + // This type is legalized to a scalar type. + if (!LT.second.isVector()) + return 0; + + // The type may be split. Normalize the index to the new type. + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + + // Floating point scalars are already located in index #0. + if (Val->getScalarType()->isFloatingPointTy() && Index == 0) + return 0; + } + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // On Sandybridge 256bit load/stores are double pumped + // (but not on Haswell). + if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2()) + Cost*=2; + + return Cost; +} |