diff options
Diffstat (limited to 'lib/Target/ARM/ARMTargetTransformInfo.cpp')
| -rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.cpp | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 53ece66..34576ba 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -124,7 +124,7 @@ public: unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; - unsigned getAddressComputationCost(Type *Val) const; + unsigned getAddressComputationCost(Type *Val, bool IsComplex) const; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue, @@ -411,12 +411,14 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, EVT SelCondTy = TLI->getValueType(CondTy); EVT SelValTy = TLI->getValueType(ValTy); - int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, - array_lengthof(NEONVectorSelectTbl), - ISD, SelCondTy.getSimpleVT(), - SelValTy.getSimpleVT()); - if (Idx != -1) - return NEONVectorSelectTbl[Idx].Cost; + if (SelCondTy.isSimple() && SelValTy.isSimple()) { + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + } std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; @@ -425,7 +427,16 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { +unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { + // Address computations in vectorized code with non-consecutive addresses will + // likely result in more instructions compared to scalar code where the + // computation can more often be merged into the index mode. The resulting + // extra micro-ops can significantly decrease throughput. + unsigned NumVectorInstToHideOverhead = 10; + + if (Ty->isVectorTy() && IsComplex) + return NumVectorInstToHideOverhead; + // In many cases the address computation is not merged into the instruction // addressing mode. return 1; |
