diff options
author | Stephen Hines <srhines@google.com> | 2015-04-01 18:49:24 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2015-04-01 18:49:26 +0000 |
commit | 3fa16bd6062e23bcdb82ed4dd965674792e6b761 (patch) | |
tree | 9348fc507292f7e8715d22d64ce5a32131b4f875 /lib/Target/AArch64/AArch64TargetTransformInfo.cpp | |
parent | beed47390a60f6f0c77532b3d3f76bb47ef49423 (diff) | |
parent | ebe69fe11e48d322045d5949c83283927a0d790b (diff) | |
download | external_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.zip external_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.tar.gz external_llvm-3fa16bd6062e23bcdb82ed4dd965674792e6b761.tar.bz2 |
Merge "Update aosp/master LLVM for rebase to r230699."
Diffstat (limited to 'lib/Target/AArch64/AArch64TargetTransformInfo.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 315 |
1 files changed, 135 insertions, 180 deletions
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b1a2914..0646d85 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===// +//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===// // // The LLVM Compiler Infrastructure // @@ -6,18 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// \file -/// This file implements a TargetTransformInfo analysis pass specific to the -/// AArch64 target machine. It uses the target's detailed information to provide -/// more precise answers to certain TTI queries, while letting the target -/// independent and default TTI implementations handle the rest. -/// -//===----------------------------------------------------------------------===// -#include "AArch64.h" -#include "AArch64TargetMachine.h" +#include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -26,130 +19,10 @@ using namespace llvm; #define DEBUG_TYPE "aarch64tti" -// Declare the pass initialization routine locally as target-specific passes -// don't have a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeAArch64TTIPass(PassRegistry &); -} - -namespace { - -class AArch64TTI final : public ImmutablePass, public TargetTransformInfo { - const AArch64TargetMachine *TM; - const AArch64Subtarget *ST; - const AArch64TargetLowering *TLI; - - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - -public: - AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - AArch64TTI(const AArch64TargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), - TLI(TM->getSubtargetImpl()->getTargetLowering()) { - initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { pushTTIStack(this); } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo *)this; - return this; - } - - /// \name Scalar TTI Implementations - /// @{ - unsigned getIntImmCost(int64_t Val) const; - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; - unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) const override; - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) const override; - PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; - - /// @} - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 32; - return 0; - } - return 31; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 128; - return 0; - } - return 64; - } - - unsigned getMaxInterleaveFactor() const override; - - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const - override; - - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const - override; - - unsigned getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, - OperandValueKind Opd2Info = OK_AnyValue, - OperandValueProperties Opd1PropInfo = OP_None, - OperandValueProperties Opd2PropInfo = OP_None) const override; - - unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override; - - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const - override; - - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override; - - unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override; - - void getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const override; - - - /// @} -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti", - "AArch64 Target Transform Info", true, true, false) -char AArch64TTI::ID = 0; - -ImmutablePass * -llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { - return new AArch64TTI(TM); -} - /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. -unsigned AArch64TTI::getIntImmCost(int64_t Val) const { +unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) { // Check if the immediate can be encoded within an instruction. if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) return 0; @@ -163,7 +36,7 @@ unsigned AArch64TTI::getIntImmCost(int64_t Val) const { } /// \brief Calculate the cost of materializing the given constant. -unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { +unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -187,25 +60,25 @@ unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { return std::max(1U, Cost); } -unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) const { +unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) - return TCC_Free; + return TTI::TCC_Free; unsigned ImmIdx = ~0U; switch (Opcode) { default: - return TCC_Free; + return TTI::TCC_Free; case Instruction::GetElementPtr: // Always hoist the base address of a GetElementPtr. if (Idx == 0) - return 2 * TCC_Basic; - return TCC_Free; + return 2 * TTI::TCC_Basic; + return TTI::TCC_Free; case Instruction::Store: ImmIdx = 0; break; @@ -227,7 +100,7 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, case Instruction::LShr: case Instruction::AShr: if (Idx == 1) - return TCC_Free; + return TTI::TCC_Free; break; case Instruction::Trunc: case Instruction::ZExt: @@ -245,26 +118,27 @@ unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, if (Idx == ImmIdx) { unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); - return (Cost <= NumConstants * TCC_Basic) - ? static_cast<unsigned>(TCC_Free) : Cost; + unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TTI::TCC_Basic) + ? static_cast<unsigned>(TTI::TCC_Free) + : Cost; } - return AArch64TTI::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty); } -unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) const { +unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); // There is no cost model for constants with a bit size of 0. Return TCC_Free // here, so that constant hoisting will ignore this constant. if (BitSize == 0) - return TCC_Free; + return TTI::TCC_Free; switch (IID) { default: - return TCC_Free; + return TTI::TCC_Free; case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: @@ -273,35 +147,36 @@ unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, case Intrinsic::umul_with_overflow: if (Idx == 1) { unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); - return (Cost <= NumConstants * TCC_Basic) - ? static_cast<unsigned>(TCC_Free) : Cost; + unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TTI::TCC_Basic) + ? static_cast<unsigned>(TTI::TCC_Free) + : Cost; } break; case Intrinsic::experimental_stackmap: if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) - return TCC_Free; + return TTI::TCC_Free; break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) - return TCC_Free; + return TTI::TCC_Free; break; } - return AArch64TTI::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty); } -AArch64TTI::PopcntSupportKind -AArch64TTI::getPopcntSupport(unsigned TyWidth) const { +TargetTransformInfo::PopcntSupportKind +AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); if (TyWidth == 32 || TyWidth == 64) - return PSK_FastHardware; + return TTI::PSK_FastHardware; // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount. - return PSK_Software; + return TTI::PSK_Software; } -unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { +unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -309,7 +184,7 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, EVT DstTy = TLI->getValueType(Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { // LowerVectorINT_TO_FP: @@ -380,11 +255,11 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, if (Idx != -1) return ConversionTbl[Idx].Cost; - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src); } -unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { +unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { assert(Val->isVectorTy() && "This must be a vector type"); if (Index != -1U) { @@ -408,10 +283,10 @@ unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, return 2; } -unsigned AArch64TTI::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, - OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo) const { +unsigned AArch64TTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); @@ -442,8 +317,8 @@ unsigned AArch64TTI::getArithmeticInstrCost( switch (ISD) { default: - return TargetTransformInfo::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: case ISD::XOR: @@ -455,7 +330,7 @@ unsigned AArch64TTI::getArithmeticInstrCost( } } -unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { +unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting @@ -470,8 +345,8 @@ unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 1; } -unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { +unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower vector selects well that are wider than the register width. @@ -498,12 +373,12 @@ unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return VectorSelectTbl[Idx].Cost; } } - return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { +unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) { std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && @@ -531,7 +406,7 @@ unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } -unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const { +unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { unsigned Cost = 0; for (auto *I : Tys) { if (!I->isVectorTy()) @@ -543,14 +418,94 @@ unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const { return Cost; } -unsigned AArch64TTI::getMaxInterleaveFactor() const { +unsigned AArch64TTIImpl::getMaxInterleaveFactor() { if (ST->isCortexA57()) return 4; return 2; } -void AArch64TTI::getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const { +void AArch64TTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { // Disable partial & runtime unrolling on -Os. UP.PartialOptSizeThreshold = 0; } + +Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, + Type *ExpectedType) { + switch (Inst->getIntrinsicID()) { + default: + return nullptr; + case Intrinsic::aarch64_neon_st2: + case Intrinsic::aarch64_neon_st3: + case Intrinsic::aarch64_neon_st4: { + // Create a struct type + StructType *ST = dyn_cast<StructType>(ExpectedType); + if (!ST) + return nullptr; + unsigned NumElts = Inst->getNumArgOperands() - 1; + if (ST->getNumElements() != NumElts) + return nullptr; + for (unsigned i = 0, e = NumElts; i != e; ++i) { + if (Inst->getArgOperand(i)->getType() != ST->getElementType(i)) + return nullptr; + } + Value *Res = UndefValue::get(ExpectedType); + IRBuilder<> Builder(Inst); + for (unsigned i = 0, e = NumElts; i != e; ++i) { + Value *L = Inst->getArgOperand(i); + Res = Builder.CreateInsertValue(Res, L, i); + } + return Res; + } + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_ld4: + if (Inst->getType() == ExpectedType) + return Inst; + return nullptr; + } +} + +bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) { + switch (Inst->getIntrinsicID()) { + default: + break; + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_ld4: + Info.ReadMem = true; + Info.WriteMem = false; + Info.Vol = false; + Info.NumMemRefs = 1; + Info.PtrVal = Inst->getArgOperand(0); + break; + case Intrinsic::aarch64_neon_st2: + case Intrinsic::aarch64_neon_st3: + case Intrinsic::aarch64_neon_st4: + Info.ReadMem = false; + Info.WriteMem = true; + Info.Vol = false; + Info.NumMemRefs = 1; + Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1); + break; + } + + switch (Inst->getIntrinsicID()) { + default: + return false; + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_st2: + Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS; + break; + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_st3: + Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS; + break; + case Intrinsic::aarch64_neon_ld4: + case Intrinsic::aarch64_neon_st4: + Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS; + break; + } + return true; +} |