aboutsummaryrefslogtreecommitdiffstats
path: root/test/Analysis/CostModel
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-02-08 14:50:48 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-02-08 14:50:48 +0000
commitfb55a8fd7c38aa09d9c243d48a8a72d890f36a3d (patch)
tree6143816aee69c2cdf8ac4bd2414689216941b036 /test/Analysis/CostModel
parentf64edf8d802824202b50046638696a6b7897d4d6 (diff)
downloadexternal_llvm-fb55a8fd7c38aa09d9c243d48a8a72d890f36a3d.zip
external_llvm-fb55a8fd7c38aa09d9c243d48a8a72d890f36a3d.tar.gz
external_llvm-fb55a8fd7c38aa09d9c243d48a8a72d890f36a3d.tar.bz2
ARM cost model: Address computation in vector mem ops not free
Adds a function to target transform info to query for the cost of address computation. The cost model analysis pass now also queries this interface. The code in LoopVectorize adds the cost of address computation as part of the memory instruction cost calculation. Only there, we know whether the instruction will be scalarized or not. Increase the penality for inserting in to D registers on swift. This becomes necessary because we now always assume that address computation has a cost and three is a closer value to the architecture. radar://13097204 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174713 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Analysis/CostModel')
-rw-r--r--test/Analysis/CostModel/ARM/gep.ll43
-rw-r--r--test/Analysis/CostModel/ARM/insertelement.ll6
-rw-r--r--test/Analysis/CostModel/X86/gep.ll40
3 files changed, 86 insertions, 3 deletions
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
new file mode 100644
index 0000000..a63b87d
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -0,0 +1,43 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define void @test_geps() {
+ ; Cost of scalar integer geps should be one. We can't always expect it to be
+ ; folded into the instruction addressing mode.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8*
+ %a0 = getelementptr inbounds i8* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16*
+ %a1 = getelementptr inbounds i16* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32*
+ %a2 = getelementptr inbounds i32* undef, i32 0
+
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64*
+ %a3 = getelementptr inbounds i64* undef, i32 0
+
+ ; Cost of scalar floating point geps should be one. We cannot fold the address
+ ; computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float*
+ %a4 = getelementptr inbounds float* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double*
+ %a5 = getelementptr inbounds double* undef, i32 0
+
+
+ ; Cost of vector geps should be one. We cannot fold the address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
+ %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
+ %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
+ %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
+ %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>*
+ %a11 = getelementptr inbounds <4 x float>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>*
+ %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+
+
+ ret void
+}
diff --git a/test/Analysis/CostModel/ARM/insertelement.ll b/test/Analysis/CostModel/ARM/insertelement.ll
index 2d43e4d..f951b08 100644
--- a/test/Analysis/CostModel/ARM/insertelement.ll
+++ b/test/Analysis/CostModel/ARM/insertelement.ll
@@ -12,7 +12,7 @@ define void @insertelement_i8(%T_i8* %saddr,
%T_i8v* %vaddr) {
%v0 = load %T_i8v* %vaddr
%v1 = load %T_i8* %saddr
-;CHECK: estimated cost of 2 for {{.*}} insertelement <8 x i8>
+;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
%v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
store %T_i8v %v2, %T_i8v* %vaddr
ret void
@@ -26,7 +26,7 @@ define void @insertelement_i16(%T_i16* %saddr,
%T_i16v* %vaddr) {
%v0 = load %T_i16v* %vaddr
%v1 = load %T_i16* %saddr
-;CHECK: estimated cost of 2 for {{.*}} insertelement <4 x i16>
+;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
%v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
store %T_i16v %v2, %T_i16v* %vaddr
ret void
@@ -39,7 +39,7 @@ define void @insertelement_i32(%T_i32* %saddr,
%T_i32v* %vaddr) {
%v0 = load %T_i32v* %vaddr
%v1 = load %T_i32* %saddr
-;CHECK: estimated cost of 2 for {{.*}} insertelement <2 x i32>
+;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
%v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
store %T_i32v %v2, %T_i32v* %vaddr
ret void
diff --git a/test/Analysis/CostModel/X86/gep.ll b/test/Analysis/CostModel/X86/gep.ll
new file mode 100644
index 0000000..877184a
--- /dev/null
+++ b/test/Analysis/CostModel/X86/gep.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+
+define void @test_geps() {
+ ; Cost of should be zero. We expect it to be folded into
+ ; the instruction addressing mode.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8*
+ %a0 = getelementptr inbounds i8* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16*
+ %a1 = getelementptr inbounds i16* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32*
+ %a2 = getelementptr inbounds i32* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64*
+ %a3 = getelementptr inbounds i64* undef, i32 0
+
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float*
+ %a4 = getelementptr inbounds float* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double*
+ %a5 = getelementptr inbounds double* undef, i32 0
+
+ ; Vector geps should also have zero cost.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
+ %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
+ %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
+ %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
+ %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>*
+ %a11 = getelementptr inbounds <4 x float>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>*
+ %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+
+
+ ret void
+}