aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-03-18 22:47:06 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-03-18 22:47:06 +0000
commit01f25710148721f9fc2dece5eec17899ca414bcc (patch)
tree0efed68c543968169bb1aaa1a5d98758dbab6782
parente572809aa153f37a7a17726f9aac26598d60e57c (diff)
downloadexternal_llvm-01f25710148721f9fc2dece5eec17899ca414bcc.zip
external_llvm-01f25710148721f9fc2dece5eec17899ca414bcc.tar.gz
external_llvm-01f25710148721f9fc2dece5eec17899ca414bcc.tar.bz2
ARM cost model: Correct cost for some cheap float to integer conversions
Fix cost of some "cheap" cast instructions. Before this patch we used to estimate for example: cost of 16 for instruction: %r = fptoui <4 x float> %v0 to <4 x i16> While we would emit: vcvt.s32.f32 q8, q8 vmovn.i32 d16, q8 vuzp.8 d16, d17 All other costs are left to the values assigned by the fallback logic. Theses costs are mostly reasonable in the sense that they get progressively more expensive as the instruction sequences emitted get longer. radar://13434072 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177333 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp10
-rw-r--r--test/Analysis/CostModel/ARM/cast.ll171
2 files changed, 179 insertions, 2 deletions
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 7a32ffb..3883403 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -224,12 +224,20 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
// Vector double <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
};
if (SrcTy.isVector() && ST->hasNEON()) {
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index 2b58f6c..88b1844 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios6.0.0"
@@ -190,6 +190,175 @@ define i32 @casts() {
; CHECK: cost of 16 {{.*}} fpext <16 x float
%r89 = fpext <16 x float> undef to <16 x double>
+ ;; Floating point to integer vector casts.
+ ; CHECK: cost of 1 {{.*}} fptoui
+ %r90 = fptoui <2 x float> undef to <2 x i1>
+ ; CHECK: cost of 1 {{.*}} fptosi
+ %r91 = fptosi <2 x float> undef to <2 x i1>
+ ; CHECK: cost of 1 {{.*}} fptoui
+ %r92 = fptoui <2 x float> undef to <2 x i8>
+ ; CHECK: cost of 1 {{.*}} fptosi
+ %r93 = fptosi <2 x float> undef to <2 x i8>
+ ; CHECK: cost of 1 {{.*}} fptoui
+ %r94 = fptoui <2 x float> undef to <2 x i16>
+ ; CHECK: cost of 1 {{.*}} fptosi
+ %r95 = fptosi <2 x float> undef to <2 x i16>
+ ; CHECK: cost of 1 {{.*}} fptoui
+ %r96 = fptoui <2 x float> undef to <2 x i32>
+ ; CHECK: cost of 1 {{.*}} fptosi
+ %r97 = fptosi <2 x float> undef to <2 x i32>
+ ; CHECK: cost of 24 {{.*}} fptoui
+ %r98 = fptoui <2 x float> undef to <2 x i64>
+ ; CHECK: cost of 24 {{.*}} fptosi
+ %r99 = fptosi <2 x float> undef to <2 x i64>
+
+ ; CHECK: cost of 8 {{.*}} fptoui
+ %r100 = fptoui <2 x double> undef to <2 x i1>
+ ; CHECK: cost of 8 {{.*}} fptosi
+ %r101 = fptosi <2 x double> undef to <2 x i1>
+ ; CHECK: cost of 8 {{.*}} fptoui
+ %r102 = fptoui <2 x double> undef to <2 x i8>
+ ; CHECK: cost of 8 {{.*}} fptosi
+ %r103 = fptosi <2 x double> undef to <2 x i8>
+ ; CHECK: cost of 8 {{.*}} fptoui
+ %r104 = fptoui <2 x double> undef to <2 x i16>
+ ; CHECK: cost of 8 {{.*}} fptosi
+ %r105 = fptosi <2 x double> undef to <2 x i16>
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r106 = fptoui <2 x double> undef to <2 x i32>
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r107 = fptosi <2 x double> undef to <2 x i32>
+ ; CHECK: cost of 24 {{.*}} fptoui
+ %r108 = fptoui <2 x double> undef to <2 x i64>
+ ; CHECK: cost of 24 {{.*}} fptosi
+ %r109 = fptosi <2 x double> undef to <2 x i64>
+
+ ; CHECK: cost of 16 {{.*}} fptoui
+ %r110 = fptoui <4 x float> undef to <4 x i1>
+ ; CHECK: cost of 16 {{.*}} fptosi
+ %r111 = fptosi <4 x float> undef to <4 x i1>
+ ; CHECK: cost of 3 {{.*}} fptoui
+ %r112 = fptoui <4 x float> undef to <4 x i8>
+ ; CHECK: cost of 3 {{.*}} fptosi
+ %r113 = fptosi <4 x float> undef to <4 x i8>
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r114 = fptoui <4 x float> undef to <4 x i16>
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r115 = fptosi <4 x float> undef to <4 x i16>
+ ; CHECK: cost of 1 {{.*}} fptoui
+ %r116 = fptoui <4 x float> undef to <4 x i32>
+ ; CHECK: cost of 1 {{.*}} fptosi
+ %r117 = fptosi <4 x float> undef to <4 x i32>
+ ; CHECK: cost of 48 {{.*}} fptoui
+ %r118 = fptoui <4 x float> undef to <4 x i64>
+ ; CHECK: cost of 48 {{.*}} fptosi
+ %r119 = fptosi <4 x float> undef to <4 x i64>
+
+ ; CHECK: cost of 16 {{.*}} fptoui
+ %r120 = fptoui <4 x double> undef to <4 x i1>
+ ; CHECK: cost of 16 {{.*}} fptosi
+ %r121 = fptosi <4 x double> undef to <4 x i1>
+ ; CHECK: cost of 16 {{.*}} fptoui
+ %r122 = fptoui <4 x double> undef to <4 x i8>
+ ; CHECK: cost of 16 {{.*}} fptosi
+ %r123 = fptosi <4 x double> undef to <4 x i8>
+ ; CHECK: cost of 16 {{.*}} fptoui
+ %r124 = fptoui <4 x double> undef to <4 x i16>
+ ; CHECK: cost of 16 {{.*}} fptosi
+ %r125 = fptosi <4 x double> undef to <4 x i16>
+ ; CHECK: cost of 16 {{.*}} fptoui
+ %r126 = fptoui <4 x double> undef to <4 x i32>
+ ; CHECK: cost of 16 {{.*}} fptosi
+ %r127 = fptosi <4 x double> undef to <4 x i32>
+ ; CHECK: cost of 48 {{.*}} fptoui
+ %r128 = fptoui <4 x double> undef to <4 x i64>
+ ; CHECK: cost of 48 {{.*}} fptosi
+ %r129 = fptosi <4 x double> undef to <4 x i64>
+
+ ; CHECK: cost of 32 {{.*}} fptoui
+ %r130 = fptoui <8 x float> undef to <8 x i1>
+ ; CHECK: cost of 32 {{.*}} fptosi
+ %r131 = fptosi <8 x float> undef to <8 x i1>
+ ; CHECK: cost of 32 {{.*}} fptoui
+ %r132 = fptoui <8 x float> undef to <8 x i8>
+ ; CHECK: cost of 32 {{.*}} fptosi
+ %r133 = fptosi <8 x float> undef to <8 x i8>
+ ; CHECK: cost of 4 {{.*}} fptoui
+ %r134 = fptoui <8 x float> undef to <8 x i16>
+ ; CHECK: cost of 4 {{.*}} fptosi
+ %r135 = fptosi <8 x float> undef to <8 x i16>
+ ; CHECK: cost of 2 {{.*}} fptoui
+ %r136 = fptoui <8 x float> undef to <8 x i32>
+ ; CHECK: cost of 2 {{.*}} fptosi
+ %r137 = fptosi <8 x float> undef to <8 x i32>
+ ; CHECK: cost of 96 {{.*}} fptoui
+ %r138 = fptoui <8 x float> undef to <8 x i64>
+ ; CHECK: cost of 96 {{.*}} fptosi
+ %r139 = fptosi <8 x float> undef to <8 x i64>
+
+ ; CHECK: cost of 32 {{.*}} fptoui
+ %r140 = fptoui <8 x double> undef to <8 x i1>
+ ; CHECK: cost of 32 {{.*}} fptosi
+ %r141 = fptosi <8 x double> undef to <8 x i1>
+ ; CHECK: cost of 32 {{.*}} fptoui
+ %r142 = fptoui <8 x double> undef to <8 x i8>
+ ; CHECK: cost of 32 {{.*}} fptosi
+ %r143 = fptosi <8 x double> undef to <8 x i8>
+ ; CHECK: cost of 32 {{.*}} fptoui
+ %r144 = fptoui <8 x double> undef to <8 x i16>
+ ; CHECK: cost of 32 {{.*}} fptosi
+ %r145 = fptosi <8 x double> undef to <8 x i16>
+ ; CHECK: cost of 32 {{.*}} fptoui
+ %r146 = fptoui <8 x double> undef to <8 x i32>
+ ; CHECK: cost of 32 {{.*}} fptosi
+ %r147 = fptosi <8 x double> undef to <8 x i32>
+ ; CHECK: cost of 96 {{.*}} fptoui
+ %r148 = fptoui <8 x double> undef to <8 x i64>
+ ; CHECK: cost of 96 {{.*}} fptosi
+ %r149 = fptosi <8 x double> undef to <8 x i64>
+
+ ; CHECK: cost of 64 {{.*}} fptoui
+ %r150 = fptoui <16 x float> undef to <16 x i1>
+ ; CHECK: cost of 64 {{.*}} fptosi
+ %r151 = fptosi <16 x float> undef to <16 x i1>
+ ; CHECK: cost of 64 {{.*}} fptoui
+ %r152 = fptoui <16 x float> undef to <16 x i8>
+ ; CHECK: cost of 64 {{.*}} fptosi
+ %r153 = fptosi <16 x float> undef to <16 x i8>
+ ; CHECK: cost of 8 {{.*}} fptoui
+ %r154 = fptoui <16 x float> undef to <16 x i16>
+ ; CHECK: cost of 8 {{.*}} fptosi
+ %r155 = fptosi <16 x float> undef to <16 x i16>
+ ; CHECK: cost of 4 {{.*}} fptoui
+ %r156 = fptoui <16 x float> undef to <16 x i32>
+ ; CHECK: cost of 4 {{.*}} fptosi
+ %r157 = fptosi <16 x float> undef to <16 x i32>
+ ; CHECK: cost of 192 {{.*}} fptoui
+ %r158 = fptoui <16 x float> undef to <16 x i64>
+ ; CHECK: cost of 192 {{.*}} fptosi
+ %r159 = fptosi <16 x float> undef to <16 x i64>
+
+ ; CHECK: cost of 64 {{.*}} fptoui
+ %r160 = fptoui <16 x double> undef to <16 x i1>
+ ; CHECK: cost of 64 {{.*}} fptosi
+ %r161 = fptosi <16 x double> undef to <16 x i1>
+ ; CHECK: cost of 64 {{.*}} fptoui
+ %r162 = fptoui <16 x double> undef to <16 x i8>
+ ; CHECK: cost of 64 {{.*}} fptosi
+ %r163 = fptosi <16 x double> undef to <16 x i8>
+ ; CHECK: cost of 64 {{.*}} fptoui
+ %r164 = fptoui <16 x double> undef to <16 x i16>
+ ; CHECK: cost of 64 {{.*}} fptosi
+ %r165 = fptosi <16 x double> undef to <16 x i16>
+ ; CHECK: cost of 64 {{.*}} fptoui
+ %r166 = fptoui <16 x double> undef to <16 x i32>
+ ; CHECK: cost of 64 {{.*}} fptosi
+ %r167 = fptosi <16 x double> undef to <16 x i32>
+ ; CHECK: cost of 192 {{.*}} fptoui
+ %r168 = fptoui <16 x double> undef to <16 x i64>
+ ; CHECK: cost of 192 {{.*}} fptosi
+ %r169 = fptosi <16 x double> undef to <16 x i64>
+
;CHECK: cost of 0 {{.*}} ret
ret i32 undef
}