aboutsummaryrefslogtreecommitdiffstats
path: root/test/Analysis/CostModel
diff options
context:
space:
mode:
Diffstat (limited to 'test/Analysis/CostModel')
-rw-r--r--test/Analysis/CostModel/AArch64/store.ll4
-rw-r--r--test/Analysis/CostModel/ARM/gep.ll48
-rw-r--r--test/Analysis/CostModel/ARM/insertelement.ll12
-rw-r--r--test/Analysis/CostModel/PowerPC/load_store.ll16
-rw-r--r--test/Analysis/CostModel/X86/gep.ll48
-rw-r--r--test/Analysis/CostModel/X86/intrinsic-cost.ll16
-rw-r--r--test/Analysis/CostModel/X86/load_store.ll34
-rw-r--r--test/Analysis/CostModel/X86/loop_v2.ll12
-rw-r--r--test/Analysis/CostModel/X86/testshiftlshr.ll16
-rw-r--r--test/Analysis/CostModel/X86/testshiftshl.ll16
-rw-r--r--test/Analysis/CostModel/X86/vectorized-loop.ll16
11 files changed, 119 insertions, 119 deletions
diff --git a/test/Analysis/CostModel/AArch64/store.ll b/test/Analysis/CostModel/AArch64/store.ll
index 0c9883c..307f8f8 100644
--- a/test/Analysis/CostModel/AArch64/store.ll
+++ b/test/Analysis/CostModel/AArch64/store.ll
@@ -14,9 +14,9 @@ define void @store() {
; CHECK: cost of 64 {{.*}} store
store <4 x i8> undef, <4 x i8> * undef
; CHECK: cost of 16 {{.*}} load
- load <2 x i8> * undef
+ load <2 x i8> , <2 x i8> * undef
; CHECK: cost of 64 {{.*}} load
- load <4 x i8> * undef
+ load <4 x i8> , <4 x i8> * undef
ret void
}
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
index a63b87d..624ca11 100644
--- a/test/Analysis/CostModel/ARM/gep.ll
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -6,37 +6,37 @@ target triple = "thumbv7-apple-ios6.0.0"
define void @test_geps() {
; Cost of scalar integer geps should be one. We can't always expect it to be
; folded into the instruction addressing mode.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8*
- %a0 = getelementptr inbounds i8* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16*
- %a1 = getelementptr inbounds i16* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32*
- %a2 = getelementptr inbounds i32* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a0 = getelementptr inbounds i8, i8* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a1 = getelementptr inbounds i16, i16* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a2 = getelementptr inbounds i32, i32* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64*
- %a3 = getelementptr inbounds i64* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a3 = getelementptr inbounds i64, i64* undef, i32 0
; Cost of scalar floating point geps should be one. We cannot fold the address
; computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float*
- %a4 = getelementptr inbounds float* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double*
- %a5 = getelementptr inbounds double* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+ %a4 = getelementptr inbounds float, float* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+ %a5 = getelementptr inbounds double, double* undef, i32 0
; Cost of vector geps should be one. We cannot fold the address computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
- %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
- %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
- %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
- %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>*
- %a11 = getelementptr inbounds <4 x float>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>*
- %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+ %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+ %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+ %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+ %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+ %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+ %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
ret void
diff --git a/test/Analysis/CostModel/ARM/insertelement.ll b/test/Analysis/CostModel/ARM/insertelement.ll
index f951b08..bd1467e 100644
--- a/test/Analysis/CostModel/ARM/insertelement.ll
+++ b/test/Analysis/CostModel/ARM/insertelement.ll
@@ -10,8 +10,8 @@ target triple = "thumbv7-apple-ios6.0.0"
; CHECK: insertelement_i8
define void @insertelement_i8(%T_i8* %saddr,
%T_i8v* %vaddr) {
- %v0 = load %T_i8v* %vaddr
- %v1 = load %T_i8* %saddr
+ %v0 = load %T_i8v, %T_i8v* %vaddr
+ %v1 = load %T_i8, %T_i8* %saddr
;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
%v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
store %T_i8v %v2, %T_i8v* %vaddr
@@ -24,8 +24,8 @@ define void @insertelement_i8(%T_i8* %saddr,
; CHECK: insertelement_i16
define void @insertelement_i16(%T_i16* %saddr,
%T_i16v* %vaddr) {
- %v0 = load %T_i16v* %vaddr
- %v1 = load %T_i16* %saddr
+ %v0 = load %T_i16v, %T_i16v* %vaddr
+ %v1 = load %T_i16, %T_i16* %saddr
;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
%v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
store %T_i16v %v2, %T_i16v* %vaddr
@@ -37,8 +37,8 @@ define void @insertelement_i16(%T_i16* %saddr,
; CHECK: insertelement_i32
define void @insertelement_i32(%T_i32* %saddr,
%T_i32v* %vaddr) {
- %v0 = load %T_i32v* %vaddr
- %v1 = load %T_i32* %saddr
+ %v0 = load %T_i32v, %T_i32v* %vaddr
+ %v1 = load %T_i32, %T_i32* %saddr
;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
%v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
store %T_i32v %v2, %T_i32v* %vaddr
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
index 368f0a7..1e50f16 100644
--- a/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -19,26 +19,26 @@ define i32 @stores(i32 %arg) {
}
define i32 @loads(i32 %arg) {
; CHECK: cost of 1 {{.*}} load
- load i8* undef, align 4
+ load i8, i8* undef, align 4
; CHECK: cost of 1 {{.*}} load
- load i16* undef, align 4
+ load i16, i16* undef, align 4
; CHECK: cost of 1 {{.*}} load
- load i32* undef, align 4
+ load i32, i32* undef, align 4
; CHECK: cost of 2 {{.*}} load
- load i64* undef, align 4
+ load i64, i64* undef, align 4
; CHECK: cost of 4 {{.*}} load
- load i128* undef, align 4
+ load i128, i128* undef, align 4
; FIXME: There actually are sub-vector Altivec loads, and so we could handle
; this with a small expense, but we don't currently.
; CHECK: cost of 48 {{.*}} load
- load <4 x i16>* undef, align 2
+ load <4 x i16>, <4 x i16>* undef, align 2
; CHECK: cost of 1 {{.*}} load
- load <4 x i32>* undef, align 4
+ load <4 x i32>, <4 x i32>* undef, align 4
; CHECK: cost of 46 {{.*}} load
- load <3 x float>* undef, align 1
+ load <3 x float>, <3 x float>* undef, align 1
ret i32 undef
}
diff --git a/test/Analysis/CostModel/X86/gep.ll b/test/Analysis/CostModel/X86/gep.ll
index 877184a..a4488ba 100644
--- a/test/Analysis/CostModel/X86/gep.ll
+++ b/test/Analysis/CostModel/X86/gep.ll
@@ -7,33 +7,33 @@ target triple = "x86_64-apple-macosx10.8.0"
define void @test_geps() {
; Cost of should be zero. We expect it to be folded into
; the instruction addressing mode.
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8*
- %a0 = getelementptr inbounds i8* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16*
- %a1 = getelementptr inbounds i16* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32*
- %a2 = getelementptr inbounds i32* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64*
- %a3 = getelementptr inbounds i64* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %a0 = getelementptr inbounds i8, i8* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %a1 = getelementptr inbounds i16, i16* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %a2 = getelementptr inbounds i32, i32* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %a3 = getelementptr inbounds i64, i64* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float*
- %a4 = getelementptr inbounds float* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double*
- %a5 = getelementptr inbounds double* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
+ %a4 = getelementptr inbounds float, float* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
+ %a5 = getelementptr inbounds double, double* undef, i32 0
; Vector geps should also have zero cost.
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
- %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
- %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
- %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
- %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>*
- %a11 = getelementptr inbounds <4 x float>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>*
- %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+ %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+ %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+ %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+ %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+ %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+ %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
ret void
diff --git a/test/Analysis/CostModel/X86/intrinsic-cost.ll b/test/Analysis/CostModel/X86/intrinsic-cost.ll
index 3b27b52..efc1263 100644
--- a/test/Analysis/CostModel/X86/intrinsic-cost.ll
+++ b/test/Analysis/CostModel/X86/intrinsic-cost.ll
@@ -9,9 +9,9 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float* %f, i64 %index
+ %0 = getelementptr inbounds float, float* %f, i64 %index
%1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>* %1, align 4
+ %wide.load = load <4 x float>, <4 x float>* %1, align 4
%2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
store <4 x float> %2, <4 x float>* %1, align 4
%index.next = add i64 %index, 4
@@ -22,7 +22,7 @@ for.end: ; preds = %vector.body
ret void
; CORE2: Printing analysis 'Cost Model Analysis' for function 'test1':
-; CORE2: Cost Model: Found an estimated cost of 400 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
; COREI7: Printing analysis 'Cost Model Analysis' for function 'test1':
; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
@@ -37,9 +37,9 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float* %f, i64 %index
+ %0 = getelementptr inbounds float, float* %f, i64 %index
%1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>* %1, align 4
+ %wide.load = load <4 x float>, <4 x float>* %1, align 4
%2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
store <4 x float> %2, <4 x float>* %1, align 4
%index.next = add i64 %index, 4
@@ -50,7 +50,7 @@ for.end: ; preds = %vector.body
ret void
; CORE2: Printing analysis 'Cost Model Analysis' for function 'test2':
-; CORE2: Cost Model: Found an estimated cost of 400 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
+; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
; COREI7: Printing analysis 'Cost Model Analysis' for function 'test2':
; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
@@ -65,9 +65,9 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float* %f, i64 %index
+ %0 = getelementptr inbounds float, float* %f, i64 %index
%1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>* %1, align 4
+ %wide.load = load <4 x float>, <4 x float>* %1, align 4
%2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
store <4 x float> %2, <4 x float>* %1, align 4
%index.next = add i64 %index, 4
diff --git a/test/Analysis/CostModel/X86/load_store.ll b/test/Analysis/CostModel/X86/load_store.ll
index a53d0bd..ccf110a 100644
--- a/test/Analysis/CostModel/X86/load_store.ll
+++ b/test/Analysis/CostModel/X86/load_store.ll
@@ -34,49 +34,49 @@ define i32 @stores(i32 %arg) {
}
define i32 @loads(i32 %arg) {
;CHECK: cost of 1 {{.*}} load
- load i8* undef, align 4
+ load i8, i8* undef, align 4
;CHECK: cost of 1 {{.*}} load
- load i16* undef, align 4
+ load i16, i16* undef, align 4
;CHECK: cost of 1 {{.*}} load
- load i32* undef, align 4
+ load i32, i32* undef, align 4
;CHECK: cost of 1 {{.*}} load
- load i64* undef, align 4
+ load i64, i64* undef, align 4
;CHECK: cost of 2 {{.*}} load
- load i128* undef, align 4
+ load i128, i128* undef, align 4
;CHECK: cost of 1 {{.*}} load
- load <2 x i32>* undef, align 4
+ load <2 x i32>, <2 x i32>* undef, align 4
;CHECK: cost of 1 {{.*}} load
- load <4 x i32>* undef, align 4
+ load <4 x i32>, <4 x i32>* undef, align 4
;CHECK: cost of 2 {{.*}} load
- load <8 x i32>* undef, align 4
+ load <8 x i32>, <8 x i32>* undef, align 4
;CHECK: cost of 1 {{.*}} load
- load <2 x i64>* undef, align 4
+ load <2 x i64>, <2 x i64>* undef, align 4
;CHECK: cost of 2 {{.*}} load
- load <4 x i64>* undef, align 4
+ load <4 x i64>, <4 x i64>* undef, align 4
;CHECK: cost of 4 {{.*}} load
- load <8 x i64>* undef, align 4
+ load <8 x i64>, <8 x i64>* undef, align 4
;CHECK: cost of 3 {{.*}} load
- load <3 x float>* undef, align 4
+ load <3 x float>, <3 x float>* undef, align 4
;CHECK: cost of 3 {{.*}} load
- load <3 x double>* undef, align 4
+ load <3 x double>, <3 x double>* undef, align 4
;CHECK: cost of 3 {{.*}} load
- load <3 x i32>* undef, align 4
+ load <3 x i32>, <3 x i32>* undef, align 4
;CHECK: cost of 3 {{.*}} load
- load <3 x i64>* undef, align 4
+ load <3 x i64>, <3 x i64>* undef, align 4
;CHECK: cost of 10 {{.*}} load
- load <5 x i32>* undef, align 4
+ load <5 x i32>, <5 x i32>* undef, align 4
;CHECK: cost of 10 {{.*}} load
- load <5 x i64>* undef, align 4
+ load <5 x i64>, <5 x i64>* undef, align 4
ret i32 undef
}
diff --git a/test/Analysis/CostModel/X86/loop_v2.ll b/test/Analysis/CostModel/X86/loop_v2.ll
index 348444e..9283310 100644
--- a/test/Analysis/CostModel/X86/loop_v2.ll
+++ b/test/Analysis/CostModel/X86/loop_v2.ll
@@ -10,20 +10,20 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
- %0 = getelementptr inbounds i32* %A, i64 %index
+ %0 = getelementptr inbounds i32, i32* %A, i64 %index
%1 = bitcast i32* %0 to <2 x i32>*
- %2 = load <2 x i32>* %1, align 4
+ %2 = load <2 x i32>, <2 x i32>* %1, align 4
%3 = sext <2 x i32> %2 to <2 x i64>
;CHECK: cost of 1 {{.*}} extract
%4 = extractelement <2 x i64> %3, i32 0
- %5 = getelementptr inbounds i32* %A, i64 %4
+ %5 = getelementptr inbounds i32, i32* %A, i64 %4
;CHECK: cost of 1 {{.*}} extract
%6 = extractelement <2 x i64> %3, i32 1
- %7 = getelementptr inbounds i32* %A, i64 %6
- %8 = load i32* %5, align 4
+ %7 = getelementptr inbounds i32, i32* %A, i64 %6
+ %8 = load i32, i32* %5, align 4
;CHECK: cost of 1 {{.*}} insert
%9 = insertelement <2 x i32> undef, i32 %8, i32 0
- %10 = load i32* %7, align 4
+ %10 = load i32, i32* %7, align 4
;CHECK: cost of 1 {{.*}} insert
%11 = insertelement <2 x i32> %9, i32 %10, i32 1
%12 = add nsw <2 x i32> %11, %vec.phi
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
index 7bc8d89..78bf0a6 100644
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -7,7 +7,7 @@ entry:
; SSE2: shift2i16
; SSE2: cost of 20 {{.*}} lshr
; SSE2-CODEGEN: shift2i16
- ; SSE2-CODEGEN: shrq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = lshr %shifttype %a , %b
ret %shifttype %0
@@ -67,7 +67,7 @@ entry:
; SSE2: shift2i32
; SSE2: cost of 20 {{.*}} lshr
; SSE2-CODEGEN: shift2i32
- ; SSE2-CODEGEN: shrq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = lshr %shifttype2i32 %a , %b
ret %shifttype2i32 %0
@@ -127,7 +127,7 @@ entry:
; SSE2: shift2i64
; SSE2: cost of 20 {{.*}} lshr
; SSE2-CODEGEN: shift2i64
- ; SSE2-CODEGEN: shrq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = lshr %shifttype2i64 %a , %b
ret %shifttype2i64 %0
@@ -139,7 +139,7 @@ entry:
; SSE2: shift4i64
; SSE2: cost of 40 {{.*}} lshr
; SSE2-CODEGEN: shift4i64
- ; SSE2-CODEGEN: shrq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = lshr %shifttype4i64 %a , %b
ret %shifttype4i64 %0
@@ -151,7 +151,7 @@ entry:
; SSE2: shift8i64
; SSE2: cost of 80 {{.*}} lshr
; SSE2-CODEGEN: shift8i64
- ; SSE2-CODEGEN: shrq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = lshr %shifttype8i64 %a , %b
ret %shifttype8i64 %0
@@ -163,7 +163,7 @@ entry:
; SSE2: shift16i64
; SSE2: cost of 160 {{.*}} lshr
; SSE2-CODEGEN: shift16i64
- ; SSE2-CODEGEN: shrq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = lshr %shifttype16i64 %a , %b
ret %shifttype16i64 %0
@@ -175,7 +175,7 @@ entry:
; SSE2: shift32i64
; SSE2: cost of 320 {{.*}} lshr
; SSE2-CODEGEN: shift32i64
- ; SSE2-CODEGEN: shrq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = lshr %shifttype32i64 %a , %b
ret %shifttype32i64 %0
@@ -187,7 +187,7 @@ entry:
; SSE2: shift2i8
; SSE2: cost of 20 {{.*}} lshr
; SSE2-CODEGEN: shift2i8
- ; SSE2-CODEGEN: shrq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = lshr %shifttype2i8 %a , %b
ret %shifttype2i8 %0
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll
index 40effd0..c36e0f5 100644
--- a/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -7,7 +7,7 @@ entry:
; SSE2: shift2i16
; SSE2: cost of 20 {{.*}} shl
; SSE2-CODEGEN: shift2i16
- ; SSE2-CODEGEN: shlq %cl
+ ; SSE2-CODEGEN: psllq
%0 = shl %shifttype %a , %b
ret %shifttype %0
@@ -67,7 +67,7 @@ entry:
; SSE2: shift2i32
; SSE2: cost of 20 {{.*}} shl
; SSE2-CODEGEN: shift2i32
- ; SSE2-CODEGEN: shlq %cl
+ ; SSE2-CODEGEN: psllq
%0 = shl %shifttype2i32 %a , %b
ret %shifttype2i32 %0
@@ -127,7 +127,7 @@ entry:
; SSE2: shift2i64
; SSE2: cost of 20 {{.*}} shl
; SSE2-CODEGEN: shift2i64
- ; SSE2-CODEGEN: shlq %cl
+ ; SSE2-CODEGEN: psllq
%0 = shl %shifttype2i64 %a , %b
ret %shifttype2i64 %0
@@ -139,7 +139,7 @@ entry:
; SSE2: shift4i64
; SSE2: cost of 40 {{.*}} shl
; SSE2-CODEGEN: shift4i64
- ; SSE2-CODEGEN: shlq %cl
+ ; SSE2-CODEGEN: psllq
%0 = shl %shifttype4i64 %a , %b
ret %shifttype4i64 %0
@@ -151,7 +151,7 @@ entry:
; SSE2: shift8i64
; SSE2: cost of 80 {{.*}} shl
; SSE2-CODEGEN: shift8i64
- ; SSE2-CODEGEN: shlq %cl
+ ; SSE2-CODEGEN: psllq
%0 = shl %shifttype8i64 %a , %b
ret %shifttype8i64 %0
@@ -163,7 +163,7 @@ entry:
; SSE2: shift16i64
; SSE2: cost of 160 {{.*}} shl
; SSE2-CODEGEN: shift16i64
- ; SSE2-CODEGEN: shlq %cl
+ ; SSE2-CODEGEN: psllq
%0 = shl %shifttype16i64 %a , %b
ret %shifttype16i64 %0
@@ -175,7 +175,7 @@ entry:
; SSE2: shift32i64
; SSE2: cost of 320 {{.*}} shl
; SSE2-CODEGEN: shift32i64
- ; SSE2-CODEGEN: shlq %cl
+ ; SSE2-CODEGEN: psllq
%0 = shl %shifttype32i64 %a , %b
ret %shifttype32i64 %0
@@ -187,7 +187,7 @@ entry:
; SSE2: shift2i8
; SSE2: cost of 20 {{.*}} shl
; SSE2-CODEGEN: shift2i8
- ; SSE2-CODEGEN: shlq %cl
+ ; SSE2-CODEGEN: psllq
%0 = shl %shifttype2i8 %a , %b
ret %shifttype2i8 %0
diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll
index af7d1df..2dd52a0 100644
--- a/test/Analysis/CostModel/X86/vectorized-loop.ll
+++ b/test/Analysis/CostModel/X86/vectorized-loop.ll
@@ -25,17 +25,17 @@ for.body.lr.ph: ; preds = %entry
vector.body: ; preds = %for.body.lr.ph, %vector.body
%index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body.lr.ph ]
%3 = add i64 %index, 2
- %4 = getelementptr inbounds i32* %B, i64 %3
+ %4 = getelementptr inbounds i32, i32* %B, i64 %3
;CHECK: cost of 0 {{.*}} bitcast
%5 = bitcast i32* %4 to <8 x i32>*
;CHECK: cost of 2 {{.*}} load
- %6 = load <8 x i32>* %5, align 4
+ %6 = load <8 x i32>, <8 x i32>* %5, align 4
;CHECK: cost of 4 {{.*}} mul
%7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
- %8 = getelementptr inbounds i32* %A, i64 %index
+ %8 = getelementptr inbounds i32, i32* %A, i64 %index
%9 = bitcast i32* %8 to <8 x i32>*
;CHECK: cost of 2 {{.*}} load
- %10 = load <8 x i32>* %9, align 4
+ %10 = load <8 x i32>, <8 x i32>* %9, align 4
;CHECK: cost of 4 {{.*}} add
%11 = add nsw <8 x i32> %10, %7
;CHECK: cost of 2 {{.*}} store
@@ -52,14 +52,14 @@ middle.block: ; preds = %vector.body, %for.b
for.body: ; preds = %middle.block, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %end.idx.rnd.down, %middle.block ]
%13 = add nsw i64 %indvars.iv, 2
- %arrayidx = getelementptr inbounds i32* %B, i64 %13
+ %arrayidx = getelementptr inbounds i32, i32* %B, i64 %13
;CHECK: cost of 1 {{.*}} load
- %14 = load i32* %arrayidx, align 4
+ %14 = load i32, i32* %arrayidx, align 4
;CHECK: cost of 1 {{.*}} mul
%mul = mul nsw i32 %14, 5
- %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
;CHECK: cost of 1 {{.*}} load
- %15 = load i32* %arrayidx2, align 4
+ %15 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %15, %mul
store i32 %add3, i32* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1