diff options
Diffstat (limited to 'test/Analysis/CostModel')
-rw-r--r-- | test/Analysis/CostModel/AArch64/store.ll | 4 | ||||
-rw-r--r-- | test/Analysis/CostModel/ARM/gep.ll | 48 | ||||
-rw-r--r-- | test/Analysis/CostModel/ARM/insertelement.ll | 12 | ||||
-rw-r--r-- | test/Analysis/CostModel/PowerPC/load_store.ll | 16 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/gep.ll | 48 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/intrinsic-cost.ll | 16 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/load_store.ll | 34 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/loop_v2.ll | 12 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/testshiftlshr.ll | 16 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/testshiftshl.ll | 16 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/vectorized-loop.ll | 16 |
11 files changed, 119 insertions, 119 deletions
diff --git a/test/Analysis/CostModel/AArch64/store.ll b/test/Analysis/CostModel/AArch64/store.ll index 0c9883c..307f8f8 100644 --- a/test/Analysis/CostModel/AArch64/store.ll +++ b/test/Analysis/CostModel/AArch64/store.ll @@ -14,9 +14,9 @@ define void @store() { ; CHECK: cost of 64 {{.*}} store store <4 x i8> undef, <4 x i8> * undef ; CHECK: cost of 16 {{.*}} load - load <2 x i8> * undef + load <2 x i8> , <2 x i8> * undef ; CHECK: cost of 64 {{.*}} load - load <4 x i8> * undef + load <4 x i8> , <4 x i8> * undef ret void } diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll index a63b87d..624ca11 100644 --- a/test/Analysis/CostModel/ARM/gep.ll +++ b/test/Analysis/CostModel/ARM/gep.ll @@ -6,37 +6,37 @@ target triple = "thumbv7-apple-ios6.0.0" define void @test_geps() { ; Cost of scalar integer geps should be one. We can't always expect it to be ; folded into the instruction addressing mode. -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8* - %a0 = getelementptr inbounds i8* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16* - %a1 = getelementptr inbounds i16* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32* - %a2 = getelementptr inbounds i32* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8* + %a0 = getelementptr inbounds i8, i8* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16* + %a1 = getelementptr inbounds i16, i16* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32* + %a2 = getelementptr inbounds i32, i32* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64* - %a3 = getelementptr inbounds i64* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64* + %a3 = getelementptr inbounds i64, i64* undef, i32 0 ; Cost of scalar floating point geps should be one. We cannot fold the address ; computation. -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float* - %a4 = getelementptr inbounds float* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double* - %a5 = getelementptr inbounds double* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float* + %a4 = getelementptr inbounds float, float* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double* + %a5 = getelementptr inbounds double, double* undef, i32 0 ; Cost of vector geps should be one. We cannot fold the address computation. -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>* - %a7 = getelementptr inbounds <4 x i8>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>* - %a8 = getelementptr inbounds <4 x i16>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>* - %a9 = getelementptr inbounds <4 x i32>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>* - %a10 = getelementptr inbounds <4 x i64>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>* - %a11 = getelementptr inbounds <4 x float>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>* - %a12 = getelementptr inbounds <4 x double>* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* + %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* + %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* + %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* + %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* + %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* + %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 ret void diff --git a/test/Analysis/CostModel/ARM/insertelement.ll b/test/Analysis/CostModel/ARM/insertelement.ll index f951b08..bd1467e 100644 --- a/test/Analysis/CostModel/ARM/insertelement.ll +++ b/test/Analysis/CostModel/ARM/insertelement.ll @@ -10,8 +10,8 @@ target triple = "thumbv7-apple-ios6.0.0" ; CHECK: insertelement_i8 define void @insertelement_i8(%T_i8* %saddr, %T_i8v* %vaddr) { - %v0 = load %T_i8v* %vaddr - %v1 = load %T_i8* %saddr + %v0 = load %T_i8v, %T_i8v* %vaddr + %v1 = load %T_i8, %T_i8* %saddr ;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8> %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1 store %T_i8v %v2, %T_i8v* %vaddr @@ -24,8 +24,8 @@ define void @insertelement_i8(%T_i8* %saddr, ; CHECK: insertelement_i16 define void @insertelement_i16(%T_i16* %saddr, %T_i16v* %vaddr) { - %v0 = load %T_i16v* %vaddr - %v1 = load %T_i16* %saddr + %v0 = load %T_i16v, %T_i16v* %vaddr + %v1 = load %T_i16, %T_i16* %saddr ;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16> %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1 store %T_i16v %v2, %T_i16v* %vaddr @@ -37,8 +37,8 @@ define void @insertelement_i16(%T_i16* %saddr, ; CHECK: insertelement_i32 define void @insertelement_i32(%T_i32* %saddr, %T_i32v* %vaddr) { - %v0 = load %T_i32v* %vaddr - %v1 = load %T_i32* %saddr + %v0 = load %T_i32v, %T_i32v* %vaddr + %v1 = load %T_i32, %T_i32* %saddr ;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32> %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1 store %T_i32v %v2, %T_i32v* %vaddr diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll index 368f0a7..1e50f16 100644 --- a/test/Analysis/CostModel/PowerPC/load_store.ll +++ b/test/Analysis/CostModel/PowerPC/load_store.ll @@ -19,26 +19,26 @@ define i32 @stores(i32 %arg) { } define i32 @loads(i32 %arg) { ; CHECK: cost of 1 {{.*}} load - load i8* undef, align 4 + load i8, i8* undef, align 4 ; CHECK: cost of 1 {{.*}} load - load i16* undef, align 4 + load i16, i16* undef, align 4 ; CHECK: cost of 1 {{.*}} load - load i32* undef, align 4 + load i32, i32* undef, align 4 ; CHECK: cost of 2 {{.*}} load - load i64* undef, align 4 + load i64, i64* undef, align 4 ; CHECK: cost of 4 {{.*}} load - load i128* undef, align 4 + load i128, i128* undef, align 4 ; FIXME: There actually are sub-vector Altivec loads, and so we could handle ; this with a small expense, but we don't currently. ; CHECK: cost of 48 {{.*}} load - load <4 x i16>* undef, align 2 + load <4 x i16>, <4 x i16>* undef, align 2 ; CHECK: cost of 1 {{.*}} load - load <4 x i32>* undef, align 4 + load <4 x i32>, <4 x i32>* undef, align 4 ; CHECK: cost of 46 {{.*}} load - load <3 x float>* undef, align 1 + load <3 x float>, <3 x float>* undef, align 1 ret i32 undef } diff --git a/test/Analysis/CostModel/X86/gep.ll b/test/Analysis/CostModel/X86/gep.ll index 877184a..a4488ba 100644 --- a/test/Analysis/CostModel/X86/gep.ll +++ b/test/Analysis/CostModel/X86/gep.ll @@ -7,33 +7,33 @@ target triple = "x86_64-apple-macosx10.8.0" define void @test_geps() { ; Cost of should be zero. We expect it to be folded into ; the instruction addressing mode. -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8* - %a0 = getelementptr inbounds i8* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16* - %a1 = getelementptr inbounds i16* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32* - %a2 = getelementptr inbounds i32* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64* - %a3 = getelementptr inbounds i64* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* + %a0 = getelementptr inbounds i8, i8* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* + %a1 = getelementptr inbounds i16, i16* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* + %a2 = getelementptr inbounds i32, i32* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* + %a3 = getelementptr inbounds i64, i64* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float* - %a4 = getelementptr inbounds float* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double* - %a5 = getelementptr inbounds double* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float* + %a4 = getelementptr inbounds float, float* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double* + %a5 = getelementptr inbounds double, double* undef, i32 0 ; Vector geps should also have zero cost. -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>* - %a7 = getelementptr inbounds <4 x i8>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>* - %a8 = getelementptr inbounds <4 x i16>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>* - %a9 = getelementptr inbounds <4 x i32>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>* - %a10 = getelementptr inbounds <4 x i64>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>* - %a11 = getelementptr inbounds <4 x float>* undef, i32 0 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>* - %a12 = getelementptr inbounds <4 x double>* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* + %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* + %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* + %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* + %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* + %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* + %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 ret void diff --git a/test/Analysis/CostModel/X86/intrinsic-cost.ll b/test/Analysis/CostModel/X86/intrinsic-cost.ll index 3b27b52..efc1263 100644 --- a/test/Analysis/CostModel/X86/intrinsic-cost.ll +++ b/test/Analysis/CostModel/X86/intrinsic-cost.ll @@ -9,9 +9,9 @@ vector.ph: vector.body: ; preds = %vector.body, %vector.ph %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds float* %f, i64 %index + %0 = getelementptr inbounds float, float* %f, i64 %index %1 = bitcast float* %0 to <4 x float>* - %wide.load = load <4 x float>* %1, align 4 + %wide.load = load <4 x float>, <4 x float>* %1, align 4 %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load) store <4 x float> %2, <4 x float>* %1, align 4 %index.next = add i64 %index, 4 @@ -22,7 +22,7 @@ for.end: ; preds = %vector.body ret void ; CORE2: Printing analysis 'Cost Model Analysis' for function 'test1': -; CORE2: Cost Model: Found an estimated cost of 400 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load) +; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load) ; COREI7: Printing analysis 'Cost Model Analysis' for function 'test1': ; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load) @@ -37,9 +37,9 @@ vector.ph: vector.body: ; preds = %vector.body, %vector.ph %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds float* %f, i64 %index + %0 = getelementptr inbounds float, float* %f, i64 %index %1 = bitcast float* %0 to <4 x float>* - %wide.load = load <4 x float>* %1, align 4 + %wide.load = load <4 x float>, <4 x float>* %1, align 4 %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) store <4 x float> %2, <4 x float>* %1, align 4 %index.next = add i64 %index, 4 @@ -50,7 +50,7 @@ for.end: ; preds = %vector.body ret void ; CORE2: Printing analysis 'Cost Model Analysis' for function 'test2': -; CORE2: Cost Model: Found an estimated cost of 400 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) +; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) ; COREI7: Printing analysis 'Cost Model Analysis' for function 'test2': ; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) @@ -65,9 +65,9 @@ vector.ph: vector.body: ; preds = %vector.body, %vector.ph %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds float* %f, i64 %index + %0 = getelementptr inbounds float, float* %f, i64 %index %1 = bitcast float* %0 to <4 x float>* - %wide.load = load <4 x float>* %1, align 4 + %wide.load = load <4 x float>, <4 x float>* %1, align 4 %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c) store <4 x float> %2, <4 x float>* %1, align 4 %index.next = add i64 %index, 4 diff --git a/test/Analysis/CostModel/X86/load_store.ll b/test/Analysis/CostModel/X86/load_store.ll index a53d0bd..ccf110a 100644 --- a/test/Analysis/CostModel/X86/load_store.ll +++ b/test/Analysis/CostModel/X86/load_store.ll @@ -34,49 +34,49 @@ define i32 @stores(i32 %arg) { } define i32 @loads(i32 %arg) { ;CHECK: cost of 1 {{.*}} load - load i8* undef, align 4 + load i8, i8* undef, align 4 ;CHECK: cost of 1 {{.*}} load - load i16* undef, align 4 + load i16, i16* undef, align 4 ;CHECK: cost of 1 {{.*}} load - load i32* undef, align 4 + load i32, i32* undef, align 4 ;CHECK: cost of 1 {{.*}} load - load i64* undef, align 4 + load i64, i64* undef, align 4 ;CHECK: cost of 2 {{.*}} load - load i128* undef, align 4 + load i128, i128* undef, align 4 ;CHECK: cost of 1 {{.*}} load - load <2 x i32>* undef, align 4 + load <2 x i32>, <2 x i32>* undef, align 4 ;CHECK: cost of 1 {{.*}} load - load <4 x i32>* undef, align 4 + load <4 x i32>, <4 x i32>* undef, align 4 ;CHECK: cost of 2 {{.*}} load - load <8 x i32>* undef, align 4 + load <8 x i32>, <8 x i32>* undef, align 4 ;CHECK: cost of 1 {{.*}} load - load <2 x i64>* undef, align 4 + load <2 x i64>, <2 x i64>* undef, align 4 ;CHECK: cost of 2 {{.*}} load - load <4 x i64>* undef, align 4 + load <4 x i64>, <4 x i64>* undef, align 4 ;CHECK: cost of 4 {{.*}} load - load <8 x i64>* undef, align 4 + load <8 x i64>, <8 x i64>* undef, align 4 ;CHECK: cost of 3 {{.*}} load - load <3 x float>* undef, align 4 + load <3 x float>, <3 x float>* undef, align 4 ;CHECK: cost of 3 {{.*}} load - load <3 x double>* undef, align 4 + load <3 x double>, <3 x double>* undef, align 4 ;CHECK: cost of 3 {{.*}} load - load <3 x i32>* undef, align 4 + load <3 x i32>, <3 x i32>* undef, align 4 ;CHECK: cost of 3 {{.*}} load - load <3 x i64>* undef, align 4 + load <3 x i64>, <3 x i64>* undef, align 4 ;CHECK: cost of 10 {{.*}} load - load <5 x i32>* undef, align 4 + load <5 x i32>, <5 x i32>* undef, align 4 ;CHECK: cost of 10 {{.*}} load - load <5 x i64>* undef, align 4 + load <5 x i64>, <5 x i64>* undef, align 4 ret i32 undef } diff --git a/test/Analysis/CostModel/X86/loop_v2.ll b/test/Analysis/CostModel/X86/loop_v2.ll index 348444e..9283310 100644 --- a/test/Analysis/CostModel/X86/loop_v2.ll +++ b/test/Analysis/CostModel/X86/loop_v2.ll @@ -10,20 +10,20 @@ vector.ph: vector.body: ; preds = %vector.body, %vector.ph %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] - %0 = getelementptr inbounds i32* %A, i64 %index + %0 = getelementptr inbounds i32, i32* %A, i64 %index %1 = bitcast i32* %0 to <2 x i32>* - %2 = load <2 x i32>* %1, align 4 + %2 = load <2 x i32>, <2 x i32>* %1, align 4 %3 = sext <2 x i32> %2 to <2 x i64> ;CHECK: cost of 1 {{.*}} extract %4 = extractelement <2 x i64> %3, i32 0 - %5 = getelementptr inbounds i32* %A, i64 %4 + %5 = getelementptr inbounds i32, i32* %A, i64 %4 ;CHECK: cost of 1 {{.*}} extract %6 = extractelement <2 x i64> %3, i32 1 - %7 = getelementptr inbounds i32* %A, i64 %6 - %8 = load i32* %5, align 4 + %7 = getelementptr inbounds i32, i32* %A, i64 %6 + %8 = load i32, i32* %5, align 4 ;CHECK: cost of 1 {{.*}} insert %9 = insertelement <2 x i32> undef, i32 %8, i32 0 - %10 = load i32* %7, align 4 + %10 = load i32, i32* %7, align 4 ;CHECK: cost of 1 {{.*}} insert %11 = insertelement <2 x i32> %9, i32 %10, i32 1 %12 = add nsw <2 x i32> %11, %vec.phi diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll index 7bc8d89..78bf0a6 100644 --- a/test/Analysis/CostModel/X86/testshiftlshr.ll +++ b/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -7,7 +7,7 @@ entry: ; SSE2: shift2i16 ; SSE2: cost of 20 {{.*}} lshr ; SSE2-CODEGEN: shift2i16 - ; SSE2-CODEGEN: shrq %cl + ; SSE2-CODEGEN: psrlq %0 = lshr %shifttype %a , %b ret %shifttype %0 @@ -67,7 +67,7 @@ entry: ; SSE2: shift2i32 ; SSE2: cost of 20 {{.*}} lshr ; SSE2-CODEGEN: shift2i32 - ; SSE2-CODEGEN: shrq %cl + ; SSE2-CODEGEN: psrlq %0 = lshr %shifttype2i32 %a , %b ret %shifttype2i32 %0 @@ -127,7 +127,7 @@ entry: ; SSE2: shift2i64 ; SSE2: cost of 20 {{.*}} lshr ; SSE2-CODEGEN: shift2i64 - ; SSE2-CODEGEN: shrq %cl + ; SSE2-CODEGEN: psrlq %0 = lshr %shifttype2i64 %a , %b ret %shifttype2i64 %0 @@ -139,7 +139,7 @@ entry: ; SSE2: shift4i64 ; SSE2: cost of 40 {{.*}} lshr ; SSE2-CODEGEN: shift4i64 - ; SSE2-CODEGEN: shrq %cl + ; SSE2-CODEGEN: psrlq %0 = lshr %shifttype4i64 %a , %b ret %shifttype4i64 %0 @@ -151,7 +151,7 @@ entry: ; SSE2: shift8i64 ; SSE2: cost of 80 {{.*}} lshr ; SSE2-CODEGEN: shift8i64 - ; SSE2-CODEGEN: shrq %cl + ; SSE2-CODEGEN: psrlq %0 = lshr %shifttype8i64 %a , %b ret %shifttype8i64 %0 @@ -163,7 +163,7 @@ entry: ; SSE2: shift16i64 ; SSE2: cost of 160 {{.*}} lshr ; SSE2-CODEGEN: shift16i64 - ; SSE2-CODEGEN: shrq %cl + ; SSE2-CODEGEN: psrlq %0 = lshr %shifttype16i64 %a , %b ret %shifttype16i64 %0 @@ -175,7 +175,7 @@ entry: ; SSE2: shift32i64 ; SSE2: cost of 320 {{.*}} lshr ; SSE2-CODEGEN: shift32i64 - ; SSE2-CODEGEN: shrq %cl + ; SSE2-CODEGEN: psrlq %0 = lshr %shifttype32i64 %a , %b ret %shifttype32i64 %0 @@ -187,7 +187,7 @@ entry: ; SSE2: shift2i8 ; SSE2: cost of 20 {{.*}} lshr ; SSE2-CODEGEN: shift2i8 - ; SSE2-CODEGEN: shrq %cl + ; SSE2-CODEGEN: psrlq %0 = lshr %shifttype2i8 %a , %b ret %shifttype2i8 %0 diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll index 40effd0..c36e0f5 100644 --- a/test/Analysis/CostModel/X86/testshiftshl.ll +++ b/test/Analysis/CostModel/X86/testshiftshl.ll @@ -7,7 +7,7 @@ entry: ; SSE2: shift2i16 ; SSE2: cost of 20 {{.*}} shl ; SSE2-CODEGEN: shift2i16 - ; SSE2-CODEGEN: shlq %cl + ; SSE2-CODEGEN: psllq %0 = shl %shifttype %a , %b ret %shifttype %0 @@ -67,7 +67,7 @@ entry: ; SSE2: shift2i32 ; SSE2: cost of 20 {{.*}} shl ; SSE2-CODEGEN: shift2i32 - ; SSE2-CODEGEN: shlq %cl + ; SSE2-CODEGEN: psllq %0 = shl %shifttype2i32 %a , %b ret %shifttype2i32 %0 @@ -127,7 +127,7 @@ entry: ; SSE2: shift2i64 ; SSE2: cost of 20 {{.*}} shl ; SSE2-CODEGEN: shift2i64 - ; SSE2-CODEGEN: shlq %cl + ; SSE2-CODEGEN: psllq %0 = shl %shifttype2i64 %a , %b ret %shifttype2i64 %0 @@ -139,7 +139,7 @@ entry: ; SSE2: shift4i64 ; SSE2: cost of 40 {{.*}} shl ; SSE2-CODEGEN: shift4i64 - ; SSE2-CODEGEN: shlq %cl + ; SSE2-CODEGEN: psllq %0 = shl %shifttype4i64 %a , %b ret %shifttype4i64 %0 @@ -151,7 +151,7 @@ entry: ; SSE2: shift8i64 ; SSE2: cost of 80 {{.*}} shl ; SSE2-CODEGEN: shift8i64 - ; SSE2-CODEGEN: shlq %cl + ; SSE2-CODEGEN: psllq %0 = shl %shifttype8i64 %a , %b ret %shifttype8i64 %0 @@ -163,7 +163,7 @@ entry: ; SSE2: shift16i64 ; SSE2: cost of 160 {{.*}} shl ; SSE2-CODEGEN: shift16i64 - ; SSE2-CODEGEN: shlq %cl + ; SSE2-CODEGEN: psllq %0 = shl %shifttype16i64 %a , %b ret %shifttype16i64 %0 @@ -175,7 +175,7 @@ entry: ; SSE2: shift32i64 ; SSE2: cost of 320 {{.*}} shl ; SSE2-CODEGEN: shift32i64 - ; SSE2-CODEGEN: shlq %cl + ; SSE2-CODEGEN: psllq %0 = shl %shifttype32i64 %a , %b ret %shifttype32i64 %0 @@ -187,7 +187,7 @@ entry: ; SSE2: shift2i8 ; SSE2: cost of 20 {{.*}} shl ; SSE2-CODEGEN: shift2i8 - ; SSE2-CODEGEN: shlq %cl + ; SSE2-CODEGEN: psllq %0 = shl %shifttype2i8 %a , %b ret %shifttype2i8 %0 diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll index af7d1df..2dd52a0 100644 --- a/test/Analysis/CostModel/X86/vectorized-loop.ll +++ b/test/Analysis/CostModel/X86/vectorized-loop.ll @@ -25,17 +25,17 @@ for.body.lr.ph: ; preds = %entry vector.body: ; preds = %for.body.lr.ph, %vector.body %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body.lr.ph ] %3 = add i64 %index, 2 - %4 = getelementptr inbounds i32* %B, i64 %3 + %4 = getelementptr inbounds i32, i32* %B, i64 %3 ;CHECK: cost of 0 {{.*}} bitcast %5 = bitcast i32* %4 to <8 x i32>* ;CHECK: cost of 2 {{.*}} load - %6 = load <8 x i32>* %5, align 4 + %6 = load <8 x i32>, <8 x i32>* %5, align 4 ;CHECK: cost of 4 {{.*}} mul %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> - %8 = getelementptr inbounds i32* %A, i64 %index + %8 = getelementptr inbounds i32, i32* %A, i64 %index %9 = bitcast i32* %8 to <8 x i32>* ;CHECK: cost of 2 {{.*}} load - %10 = load <8 x i32>* %9, align 4 + %10 = load <8 x i32>, <8 x i32>* %9, align 4 ;CHECK: cost of 4 {{.*}} add %11 = add nsw <8 x i32> %10, %7 ;CHECK: cost of 2 {{.*}} store @@ -52,14 +52,14 @@ middle.block: ; preds = %vector.body, %for.b for.body: ; preds = %middle.block, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %end.idx.rnd.down, %middle.block ] %13 = add nsw i64 %indvars.iv, 2 - %arrayidx = getelementptr inbounds i32* %B, i64 %13 + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %13 ;CHECK: cost of 1 {{.*}} load - %14 = load i32* %arrayidx, align 4 + %14 = load i32, i32* %arrayidx, align 4 ;CHECK: cost of 1 {{.*}} mul %mul = mul nsw i32 %14, 5 - %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv ;CHECK: cost of 1 {{.*}} load - %15 = load i32* %arrayidx2, align 4 + %15 = load i32, i32* %arrayidx2, align 4 %add3 = add nsw i32 %15, %mul store i32 %add3, i32* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 |