aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp115
-rw-r--r--test/Analysis/CostModel/ARM/cast.ll158
2 files changed, 266 insertions, 7 deletions
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2ded63f..bf83d51 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -177,25 +177,126 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
// Some arithmetic, load and store operations have specific instructions
- // to cast up/down their types automatically at no extra cost
- // TODO: Get these tables to know at least what the related operations are
- static const TypeConversionCostTblEntry<MVT> NEONConversionTbl[] = {
+ // to cast up/down their types automatically at no extra cost.
+ // TODO: Get these tables to know at least what the related operations are.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+
+ // Vector float <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+
+ // Vector double <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }
};
- if (ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONConversionTbl,
- array_lengthof(NEONConversionTbl),
+ if (SrcTy.isVector() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
+ array_lengthof(NEONVectorConversionTbl),
ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
- return NEONConversionTbl[Idx].Cost;
+ return NEONVectorConversionTbl[Idx].Cost;
+ }
+
+ // Scalar float to integer conversions.
+ static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
+ };
+ if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
+ array_lengthof(NEONFloatConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONFloatConversionTbl[Idx].Cost;
+ }
+
+
+ // Scalar integer to float conversions.
+ static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
+ };
+
+ if (SrcTy.isInteger() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
+ array_lengthof(NEONIntegerConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONIntegerConversionTbl[Idx].Cost;
}
+ // Scalar integer conversion costs.
+ static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
+ // i16 -> i64 requires two dependent operations.
+ { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
+
+ // Truncates on i64 are assumed to be free.
+ { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
+ };
+
+ if (SrcTy.isInteger()) {
+ int Idx =
+ ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
+ array_lengthof(ARMIntegerConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return ARMIntegerConversionTbl[Idx].Cost;
+ }
+
+
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
}
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
new file mode 100644
index 0000000..464b6ec
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -0,0 +1,158 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define i32 @casts() {
+
+ ; -- scalars --
+ ; CHECK: cost of 1 {{.*}} sext
+ %r0 = sext i1 undef to i8
+ ; CHECK: cost of 1 {{.*}} zext
+ %r1 = zext i1 undef to i8
+ ; CHECK: cost of 1 {{.*}} sext
+ %r2 = sext i1 undef to i16
+ ; CHECK: cost of 1 {{.*}} zext
+ %r3 = zext i1 undef to i16
+ ; CHECK: cost of 1 {{.*}} sext
+ %r4 = sext i1 undef to i32
+ ; CHECK: cost of 1 {{.*}} zext
+ %r5 = zext i1 undef to i32
+ ; CHECK: cost of 1 {{.*}} sext
+ %r6 = sext i1 undef to i64
+ ; CHECK: cost of 1 {{.*}} zext
+ %r7 = zext i1 undef to i64
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r8 = trunc i8 undef to i1
+ ; CHECK: cost of 1 {{.*}} sext
+ %r9 = sext i8 undef to i16
+ ; CHECK: cost of 1 {{.*}} zext
+ %r10 = zext i8 undef to i16
+ ; CHECK: cost of 1 {{.*}} sext
+ %r11 = sext i8 undef to i32
+ ; CHECK: cost of 1 {{.*}} zext
+ %r12 = zext i8 undef to i32
+ ; CHECK: cost of 1 {{.*}} sext
+ %r13 = sext i8 undef to i64
+ ; CHECK: cost of 1 {{.*}} zext
+ %r14 = zext i8 undef to i64
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r15 = trunc i16 undef to i1
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r16 = trunc i16 undef to i8
+ ; CHECK: cost of 1 {{.*}} sext
+ %r17 = sext i16 undef to i32
+ ; CHECK: cost of 1 {{.*}} zext
+ %r18 = zext i16 undef to i32
+ ; CHECK: cost of 2 {{.*}} sext
+ %r19 = sext i16 undef to i64
+ ; CHECK: cost of 1 {{.*}} zext
+ %r20 = zext i16 undef to i64
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r21 = trunc i32 undef to i1
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r22 = trunc i32 undef to i8
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r23 = trunc i32 undef to i16
+ ; CHECK: cost of 1 {{.*}} sext
+ %r24 = sext i32 undef to i64
+ ; CHECK: cost of 1 {{.*}} zext
+ %r25 = zext i32 undef to i64
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r26 = trunc i64 undef to i1
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r27 = trunc i64 undef to i8
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r28 = trunc i64 undef to i16
+ ; CHECK: cost of 0 {{.*}} trunc
+ %r29 = trunc i64 undef to i32
+
+ ; -- floating point conversions --
+ ; Moves between scalar and NEON registers.
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r30 = fptoui float undef to i1
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r31 = fptosi float undef to i1
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r32 = fptoui float undef to i8
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r33 = fptosi float undef to i8
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r34 = fptoui float undef to i16
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r35 = fptosi float undef to i16
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r36 = fptoui float undef to i32
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r37 = fptosi float undef to i32
+ ; CHECK: cost of 10 {{.*}} fptoui
+ %r38 = fptoui float undef to i64
+ ; CHECK: cost of 10 {{.*}} fptosi
+ %r39 = fptosi float undef to i64
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r40 = fptoui double undef to i1
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r41 = fptosi double undef to i1
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r42 = fptoui double undef to i8
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r43 = fptosi double undef to i8
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r44 = fptoui double undef to i16
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r45 = fptosi double undef to i16
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r46 = fptoui double undef to i32
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r47 = fptosi double undef to i32
+ ; Function call
+ ; CHECK: cost of 10 {{.*}} fptoui
+ %r48 = fptoui double undef to i64
+ ; CHECK: cost of 10 {{.*}} fptosi
+ %r49 = fptosi double undef to i64
+
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r50 = sitofp i1 undef to float
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r51 = uitofp i1 undef to float
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r52 = sitofp i1 undef to double
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r53 = uitofp i1 undef to double
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r54 = sitofp i8 undef to float
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r55 = uitofp i8 undef to float
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r56 = sitofp i8 undef to double
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r57 = uitofp i8 undef to double
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r58 = sitofp i16 undef to float
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r59 = uitofp i16 undef to float
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r60 = sitofp i16 undef to double
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r61 = uitofp i16 undef to double
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r62 = sitofp i32 undef to float
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r63 = uitofp i32 undef to float
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r64 = sitofp i32 undef to double
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r65 = uitofp i32 undef to double
+ ; Function call
+ ; CHECK: cost of 10 {{.*}} sitofp
+ %r66 = sitofp i64 undef to float
+ ; CHECK: cost of 10 {{.*}} uitofp
+ %r67 = uitofp i64 undef to float
+ ; CHECK: cost of 10 {{.*}} sitofp
+ %r68 = sitofp i64 undef to double
+ ; CHECK: cost of 10 {{.*}} uitofp
+ %r69 = uitofp i64 undef to double
+
+ ;CHECK: cost of 0 {{.*}} ret
+ ret i32 undef
+}
+