diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-04-25 21:16:18 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-04-25 21:16:18 +0000 |
commit | 45c9e0b412495c2d660918b0e964529bcb5e05b8 (patch) | |
tree | 93c99daadd0a7bf354ff775609f65b417cb263f3 | |
parent | 1c489455ea5fac43a5f20911dfb5486630eb0160 (diff) | |
download | external_llvm-45c9e0b412495c2d660918b0e964529bcb5e05b8.zip external_llvm-45c9e0b412495c2d660918b0e964529bcb5e05b8.tar.gz external_llvm-45c9e0b412495c2d660918b0e964529bcb5e05b8.tar.bz2 |
ARM cost model: Integer div and rem is lowered to a function call
Reflect this in the cost model. I observed this in MiBench/consumer-lame.
radar://13354716
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180576 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.cpp | 68 | ||||
-rw-r--r-- | test/Analysis/CostModel/ARM/divrem.ll | 450 |
2 files changed, 518 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 3149f19..53ece66 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -125,6 +125,10 @@ public: unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; unsigned getAddressComputationCost(Type *Val) const; + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Op1Info = OK_AnyValue, + OperandValueKind Op2Info = OK_AnyValue) const; /// @} }; @@ -456,3 +460,67 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, return LT.first * NEONShuffleTbl[Idx].Cost; } + +unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info, + OperandValueKind Op2Info) const { + + int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + + const unsigned FunctionCallDivCost = 20; + const unsigned ReciprocalDivCost = 10; + static const CostTblEntry<MVT> CostTbl[] = { + // Division. + // These costs are somewhat random. Choose a cost of 20 to indicate that + // vectorizing devision (added function call) is going to be very expensive. + // Double registers types. + { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v4i16, ReciprocalDivCost}, + { ISD::UDIV, MVT::v4i16, ReciprocalDivCost}, + { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost}, + { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v8i8, ReciprocalDivCost}, + { ISD::UDIV, MVT::v8i8, ReciprocalDivCost}, + { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost}, + { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost}, + // Quad register types. + { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost}, + // Multiplication. + }; + + int Idx = -1; + + if (ST->hasNEON()) + Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode, + LT.second); + + if (Idx != -1) + return LT.first * CostTbl[Idx].Cost; + + + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, + Op2Info); +} + diff --git a/test/Analysis/CostModel/ARM/divrem.ll b/test/Analysis/CostModel/ARM/divrem.ll new file mode 100644 index 0000000..c4ac59b --- /dev/null +++ b/test/Analysis/CostModel/ARM/divrem.ll @@ -0,0 +1,450 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s + +define <2 x i8> @sdiv_v2_i8(<2 x i8> %a, <2 x i8> %b) { + ; CHECK: sdiv_v2_i8 + ; CHECK: cost of 40 {{.*}} sdiv + + %1 = sdiv <2 x i8> %a, %b + ret <2 x i8> %1 +} +define <2 x i16> @sdiv_v2_i16(<2 x i16> %a, <2 x i16> %b) { + ; CHECK: sdiv_v2_i16 + ; CHECK: cost of 40 {{.*}} sdiv + + %1 = sdiv <2 x i16> %a, %b + ret <2 x i16> %1 +} +define <2 x i32> @sdiv_v2_i32(<2 x i32> %a, <2 x i32> %b) { + ; CHECK: sdiv_v2_i32 + ; CHECK: cost of 40 {{.*}} sdiv + + %1 = sdiv <2 x i32> %a, %b + ret <2 x i32> %1 +} +define <2 x i64> @sdiv_v2_i64(<2 x i64> %a, <2 x i64> %b) { + ; CHECK: sdiv_v2_i64 + ; CHECK: cost of 40 {{.*}} sdiv + + %1 = sdiv <2 x i64> %a, %b + ret <2 x i64> %1 +} +define <4 x i8> @sdiv_v4_i8(<4 x i8> %a, <4 x i8> %b) { + ; CHECK: sdiv_v4_i8 + ; CHECK: cost of 10 {{.*}} sdiv + + %1 = sdiv <4 x i8> %a, %b + ret <4 x i8> %1 +} +define <4 x i16> @sdiv_v4_i16(<4 x i16> %a, <4 x i16> %b) { + ; CHECK: sdiv_v4_i16 + ; CHECK: cost of 10 {{.*}} sdiv + + %1 = sdiv <4 x i16> %a, %b + ret <4 x i16> %1 +} +define <4 x i32> @sdiv_v4_i32(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: sdiv_v4_i32 + ; CHECK: cost of 80 {{.*}} sdiv + + %1 = sdiv <4 x i32> %a, %b + ret <4 x i32> %1 +} +define <4 x i64> @sdiv_v4_i64(<4 x i64> %a, <4 x i64> %b) { + ; CHECK: sdiv_v4_i64 + ; CHECK: cost of 80 {{.*}} sdiv + + %1 = sdiv <4 x i64> %a, %b + ret <4 x i64> %1 +} +define <8 x i8> @sdiv_v8_i8(<8 x i8> %a, <8 x i8> %b) { + ; CHECK: sdiv_v8_i8 + ; CHECK: cost of 10 {{.*}} sdiv + + %1 = sdiv <8 x i8> %a, %b + ret <8 x i8> %1 +} +define <8 x i16> @sdiv_v8_i16(<8 x i16> %a, <8 x i16> %b) { + ; CHECK: sdiv_v8_i16 + ; CHECK: cost of 160 {{.*}} sdiv + + %1 = sdiv <8 x i16> %a, %b + ret <8 x i16> %1 +} +define <8 x i32> @sdiv_v8_i32(<8 x i32> %a, <8 x i32> %b) { + ; CHECK: sdiv_v8_i32 + ; CHECK: cost of 160 {{.*}} sdiv + + %1 = sdiv <8 x i32> %a, %b + ret <8 x i32> %1 +} +define <8 x i64> @sdiv_v8_i64(<8 x i64> %a, <8 x i64> %b) { + ; CHECK: sdiv_v8_i64 + ; CHECK: cost of 160 {{.*}} sdiv + + %1 = sdiv <8 x i64> %a, %b + ret <8 x i64> %1 +} +define <16 x i8> @sdiv_v16_i8(<16 x i8> %a, <16 x i8> %b) { + ; CHECK: sdiv_v16_i8 + ; CHECK: cost of 320 {{.*}} sdiv + + %1 = sdiv <16 x i8> %a, %b + ret <16 x i8> %1 +} +define <16 x i16> @sdiv_v16_i16(<16 x i16> %a, <16 x i16> %b) { + ; CHECK: sdiv_v16_i16 + ; CHECK: cost of 320 {{.*}} sdiv + + %1 = sdiv <16 x i16> %a, %b + ret <16 x i16> %1 +} +define <16 x i32> @sdiv_v16_i32(<16 x i32> %a, <16 x i32> %b) { + ; CHECK: sdiv_v16_i32 + ; CHECK: cost of 320 {{.*}} sdiv + + %1 = sdiv <16 x i32> %a, %b + ret <16 x i32> %1 +} +define <16 x i64> @sdiv_v16_i64(<16 x i64> %a, <16 x i64> %b) { + ; CHECK: sdiv_v16_i64 + ; CHECK: cost of 320 {{.*}} sdiv + + %1 = sdiv <16 x i64> %a, %b + ret <16 x i64> %1 +} +define <2 x i8> @udiv_v2_i8(<2 x i8> %a, <2 x i8> %b) { + ; CHECK: udiv_v2_i8 + ; CHECK: cost of 40 {{.*}} udiv + + %1 = udiv <2 x i8> %a, %b + ret <2 x i8> %1 +} +define <2 x i16> @udiv_v2_i16(<2 x i16> %a, <2 x i16> %b) { + ; CHECK: udiv_v2_i16 + ; CHECK: cost of 40 {{.*}} udiv + + %1 = udiv <2 x i16> %a, %b + ret <2 x i16> %1 +} +define <2 x i32> @udiv_v2_i32(<2 x i32> %a, <2 x i32> %b) { + ; CHECK: udiv_v2_i32 + ; CHECK: cost of 40 {{.*}} udiv + + %1 = udiv <2 x i32> %a, %b + ret <2 x i32> %1 +} +define <2 x i64> @udiv_v2_i64(<2 x i64> %a, <2 x i64> %b) { + ; CHECK: udiv_v2_i64 + ; CHECK: cost of 40 {{.*}} udiv + + %1 = udiv <2 x i64> %a, %b + ret <2 x i64> %1 +} +define <4 x i8> @udiv_v4_i8(<4 x i8> %a, <4 x i8> %b) { + ; CHECK: udiv_v4_i8 + ; CHECK: cost of 10 {{.*}} udiv + + %1 = udiv <4 x i8> %a, %b + ret <4 x i8> %1 +} +define <4 x i16> @udiv_v4_i16(<4 x i16> %a, <4 x i16> %b) { + ; CHECK: udiv_v4_i16 + ; CHECK: cost of 10 {{.*}} udiv + + %1 = udiv <4 x i16> %a, %b + ret <4 x i16> %1 +} +define <4 x i32> @udiv_v4_i32(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: udiv_v4_i32 + ; CHECK: cost of 80 {{.*}} udiv + + %1 = udiv <4 x i32> %a, %b + ret <4 x i32> %1 +} +define <4 x i64> @udiv_v4_i64(<4 x i64> %a, <4 x i64> %b) { + ; CHECK: udiv_v4_i64 + ; CHECK: cost of 80 {{.*}} udiv + + %1 = udiv <4 x i64> %a, %b + ret <4 x i64> %1 +} +define <8 x i8> @udiv_v8_i8(<8 x i8> %a, <8 x i8> %b) { + ; CHECK: udiv_v8_i8 + ; CHECK: cost of 10 {{.*}} udiv + + %1 = udiv <8 x i8> %a, %b + ret <8 x i8> %1 +} +define <8 x i16> @udiv_v8_i16(<8 x i16> %a, <8 x i16> %b) { + ; CHECK: udiv_v8_i16 + ; CHECK: cost of 160 {{.*}} udiv + + %1 = udiv <8 x i16> %a, %b + ret <8 x i16> %1 +} +define <8 x i32> @udiv_v8_i32(<8 x i32> %a, <8 x i32> %b) { + ; CHECK: udiv_v8_i32 + ; CHECK: cost of 160 {{.*}} udiv + + %1 = udiv <8 x i32> %a, %b + ret <8 x i32> %1 +} +define <8 x i64> @udiv_v8_i64(<8 x i64> %a, <8 x i64> %b) { + ; CHECK: udiv_v8_i64 + ; CHECK: cost of 160 {{.*}} udiv + + %1 = udiv <8 x i64> %a, %b + ret <8 x i64> %1 +} +define <16 x i8> @udiv_v16_i8(<16 x i8> %a, <16 x i8> %b) { + ; CHECK: udiv_v16_i8 + ; CHECK: cost of 320 {{.*}} udiv + + %1 = udiv <16 x i8> %a, %b + ret <16 x i8> %1 +} +define <16 x i16> @udiv_v16_i16(<16 x i16> %a, <16 x i16> %b) { + ; CHECK: udiv_v16_i16 + ; CHECK: cost of 320 {{.*}} udiv + + %1 = udiv <16 x i16> %a, %b + ret <16 x i16> %1 +} +define <16 x i32> @udiv_v16_i32(<16 x i32> %a, <16 x i32> %b) { + ; CHECK: udiv_v16_i32 + ; CHECK: cost of 320 {{.*}} udiv + + %1 = udiv <16 x i32> %a, %b + ret <16 x i32> %1 +} +define <16 x i64> @udiv_v16_i64(<16 x i64> %a, <16 x i64> %b) { + ; CHECK: udiv_v16_i64 + ; CHECK: cost of 320 {{.*}} udiv + + %1 = udiv <16 x i64> %a, %b + ret <16 x i64> %1 +} +define <2 x i8> @srem_v2_i8(<2 x i8> %a, <2 x i8> %b) { + ; CHECK: srem_v2_i8 + ; CHECK: cost of 40 {{.*}} srem + + %1 = srem <2 x i8> %a, %b + ret <2 x i8> %1 +} +define <2 x i16> @srem_v2_i16(<2 x i16> %a, <2 x i16> %b) { + ; CHECK: srem_v2_i16 + ; CHECK: cost of 40 {{.*}} srem + + %1 = srem <2 x i16> %a, %b + ret <2 x i16> %1 +} +define <2 x i32> @srem_v2_i32(<2 x i32> %a, <2 x i32> %b) { + ; CHECK: srem_v2_i32 + ; CHECK: cost of 40 {{.*}} srem + + %1 = srem <2 x i32> %a, %b + ret <2 x i32> %1 +} +define <2 x i64> @srem_v2_i64(<2 x i64> %a, <2 x i64> %b) { + ; CHECK: srem_v2_i64 + ; CHECK: cost of 40 {{.*}} srem + + %1 = srem <2 x i64> %a, %b + ret <2 x i64> %1 +} +define <4 x i8> @srem_v4_i8(<4 x i8> %a, <4 x i8> %b) { + ; CHECK: srem_v4_i8 + ; CHECK: cost of 80 {{.*}} srem + + %1 = srem <4 x i8> %a, %b + ret <4 x i8> %1 +} +define <4 x i16> @srem_v4_i16(<4 x i16> %a, <4 x i16> %b) { + ; CHECK: srem_v4_i16 + ; CHECK: cost of 80 {{.*}} srem + + %1 = srem <4 x i16> %a, %b + ret <4 x i16> %1 +} +define <4 x i32> @srem_v4_i32(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: srem_v4_i32 + ; CHECK: cost of 80 {{.*}} srem + + %1 = srem <4 x i32> %a, %b + ret <4 x i32> %1 +} +define <4 x i64> @srem_v4_i64(<4 x i64> %a, <4 x i64> %b) { + ; CHECK: srem_v4_i64 + ; CHECK: cost of 80 {{.*}} srem + + %1 = srem <4 x i64> %a, %b + ret <4 x i64> %1 +} +define <8 x i8> @srem_v8_i8(<8 x i8> %a, <8 x i8> %b) { + ; CHECK: srem_v8_i8 + ; CHECK: cost of 160 {{.*}} srem + + %1 = srem <8 x i8> %a, %b + ret <8 x i8> %1 +} +define <8 x i16> @srem_v8_i16(<8 x i16> %a, <8 x i16> %b) { + ; CHECK: srem_v8_i16 + ; CHECK: cost of 160 {{.*}} srem + + %1 = srem <8 x i16> %a, %b + ret <8 x i16> %1 +} +define <8 x i32> @srem_v8_i32(<8 x i32> %a, <8 x i32> %b) { + ; CHECK: srem_v8_i32 + ; CHECK: cost of 160 {{.*}} srem + + %1 = srem <8 x i32> %a, %b + ret <8 x i32> %1 +} +define <8 x i64> @srem_v8_i64(<8 x i64> %a, <8 x i64> %b) { + ; CHECK: srem_v8_i64 + ; CHECK: cost of 160 {{.*}} srem + + %1 = srem <8 x i64> %a, %b + ret <8 x i64> %1 +} +define <16 x i8> @srem_v16_i8(<16 x i8> %a, <16 x i8> %b) { + ; CHECK: srem_v16_i8 + ; CHECK: cost of 320 {{.*}} srem + + %1 = srem <16 x i8> %a, %b + ret <16 x i8> %1 +} +define <16 x i16> @srem_v16_i16(<16 x i16> %a, <16 x i16> %b) { + ; CHECK: srem_v16_i16 + ; CHECK: cost of 320 {{.*}} srem + + %1 = srem <16 x i16> %a, %b + ret <16 x i16> %1 +} +define <16 x i32> @srem_v16_i32(<16 x i32> %a, <16 x i32> %b) { + ; CHECK: srem_v16_i32 + ; CHECK: cost of 320 {{.*}} srem + + %1 = srem <16 x i32> %a, %b + ret <16 x i32> %1 +} +define <16 x i64> @srem_v16_i64(<16 x i64> %a, <16 x i64> %b) { + ; CHECK: srem_v16_i64 + ; CHECK: cost of 320 {{.*}} srem + + %1 = srem <16 x i64> %a, %b + ret <16 x i64> %1 +} +define <2 x i8> @urem_v2_i8(<2 x i8> %a, <2 x i8> %b) { + ; CHECK: urem_v2_i8 + ; CHECK: cost of 40 {{.*}} urem + + %1 = urem <2 x i8> %a, %b + ret <2 x i8> %1 +} +define <2 x i16> @urem_v2_i16(<2 x i16> %a, <2 x i16> %b) { + ; CHECK: urem_v2_i16 + ; CHECK: cost of 40 {{.*}} urem + + %1 = urem <2 x i16> %a, %b + ret <2 x i16> %1 +} +define <2 x i32> @urem_v2_i32(<2 x i32> %a, <2 x i32> %b) { + ; CHECK: urem_v2_i32 + ; CHECK: cost of 40 {{.*}} urem + + %1 = urem <2 x i32> %a, %b + ret <2 x i32> %1 +} +define <2 x i64> @urem_v2_i64(<2 x i64> %a, <2 x i64> %b) { + ; CHECK: urem_v2_i64 + ; CHECK: cost of 40 {{.*}} urem + + %1 = urem <2 x i64> %a, %b + ret <2 x i64> %1 +} +define <4 x i8> @urem_v4_i8(<4 x i8> %a, <4 x i8> %b) { + ; CHECK: urem_v4_i8 + ; CHECK: cost of 80 {{.*}} urem + + %1 = urem <4 x i8> %a, %b + ret <4 x i8> %1 +} +define <4 x i16> @urem_v4_i16(<4 x i16> %a, <4 x i16> %b) { + ; CHECK: urem_v4_i16 + ; CHECK: cost of 80 {{.*}} urem + + %1 = urem <4 x i16> %a, %b + ret <4 x i16> %1 +} +define <4 x i32> @urem_v4_i32(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: urem_v4_i32 + ; CHECK: cost of 80 {{.*}} urem + + %1 = urem <4 x i32> %a, %b + ret <4 x i32> %1 +} +define <4 x i64> @urem_v4_i64(<4 x i64> %a, <4 x i64> %b) { + ; CHECK: urem_v4_i64 + ; CHECK: cost of 80 {{.*}} urem + + %1 = urem <4 x i64> %a, %b + ret <4 x i64> %1 +} +define <8 x i8> @urem_v8_i8(<8 x i8> %a, <8 x i8> %b) { + ; CHECK: urem_v8_i8 + ; CHECK: cost of 160 {{.*}} urem + + %1 = urem <8 x i8> %a, %b + ret <8 x i8> %1 +} +define <8 x i16> @urem_v8_i16(<8 x i16> %a, <8 x i16> %b) { + ; CHECK: urem_v8_i16 + ; CHECK: cost of 160 {{.*}} urem + + %1 = urem <8 x i16> %a, %b + ret <8 x i16> %1 +} +define <8 x i32> @urem_v8_i32(<8 x i32> %a, <8 x i32> %b) { + ; CHECK: urem_v8_i32 + ; CHECK: cost of 160 {{.*}} urem + + %1 = urem <8 x i32> %a, %b + ret <8 x i32> %1 +} +define <8 x i64> @urem_v8_i64(<8 x i64> %a, <8 x i64> %b) { + ; CHECK: urem_v8_i64 + ; CHECK: cost of 160 {{.*}} urem + + %1 = urem <8 x i64> %a, %b + ret <8 x i64> %1 +} +define <16 x i8> @urem_v16_i8(<16 x i8> %a, <16 x i8> %b) { + ; CHECK: urem_v16_i8 + ; CHECK: cost of 320 {{.*}} urem + + %1 = urem <16 x i8> %a, %b + ret <16 x i8> %1 +} +define <16 x i16> @urem_v16_i16(<16 x i16> %a, <16 x i16> %b) { + ; CHECK: urem_v16_i16 + ; CHECK: cost of 320 {{.*}} urem + + %1 = urem <16 x i16> %a, %b + ret <16 x i16> %1 +} +define <16 x i32> @urem_v16_i32(<16 x i32> %a, <16 x i32> %b) { + ; CHECK: urem_v16_i32 + ; CHECK: cost of 320 {{.*}} urem + + %1 = urem <16 x i32> %a, %b + ret <16 x i32> %1 +} +define <16 x i64> @urem_v16_i64(<16 x i64> %a, <16 x i64> %b) { + ; CHECK: urem_v16_i64 + ; CHECK: cost of 320 {{.*}} urem + + %1 = urem <16 x i64> %a, %b + ret <16 x i64> %1 +} |