11 files changed, 119 insertions, 119 deletions
diff --git a/test/Analysis/CostModel/AArch64/store.ll b/test/Analysis/CostModel/AArch64/store.ll
index 0c9883c..307f8f8 100644
--- a/test/Analysis/CostModel/AArch64/store.ll
+++ b/test/Analysis/CostModel/AArch64/store.ll
@@ -14,9 +14,9 @@ define void @store() {
     ; CHECK: cost of 64 {{.*}} store
     store <4 x i8> undef, <4 x i8> * undef
     ; CHECK: cost of 16 {{.*}} load
-    load <2 x i8> * undef
+    load <2 x i8> , <2 x i8> * undef
     ; CHECK: cost of 64 {{.*}} load
-    load <4 x i8> * undef
+    load <4 x i8> , <4 x i8> * undef
 
     ret void
 }
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
index a63b87d..624ca11 100644
--- a/test/Analysis/CostModel/ARM/gep.ll
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -6,37 +6,37 @@ target triple = "thumbv7-apple-ios6.0.0"
 define void @test_geps() {
   ; Cost of scalar integer geps should be one. We can't always expect it to be
   ; folded into the instruction addressing mode.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8*
-  %a0 = getelementptr inbounds i8* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16*
-  %a1 = getelementptr inbounds i16* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32*
-  %a2 = getelementptr inbounds i32* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
+  %a0 = getelementptr inbounds i8, i8* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+  %a1 = getelementptr inbounds i16, i16* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+  %a2 = getelementptr inbounds i32, i32* undef, i32 0
 
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64*
-  %a3 = getelementptr inbounds i64* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+  %a3 = getelementptr inbounds i64, i64* undef, i32 0
 
   ; Cost of scalar floating point geps should be one. We cannot fold the address
   ; computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float*
-  %a4 = getelementptr inbounds float* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double*
-  %a5 = getelementptr inbounds double* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+  %a4 = getelementptr inbounds float, float* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+  %a5 = getelementptr inbounds double, double* undef, i32 0
 
 
   ; Cost of vector geps should be one. We cannot fold the address computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
-  %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
-  %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
-  %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
-  %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>*
-  %a11 = getelementptr inbounds <4 x float>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>*
-  %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+  %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+  %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+  %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+  %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+  %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+  %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
 
 
   ret void
diff --git a/test/Analysis/CostModel/ARM/insertelement.ll b/test/Analysis/CostModel/ARM/insertelement.ll
index f951b08..bd1467e 100644
--- a/test/Analysis/CostModel/ARM/insertelement.ll
+++ b/test/Analysis/CostModel/ARM/insertelement.ll
@@ -10,8 +10,8 @@ target triple = "thumbv7-apple-ios6.0.0"
 ; CHECK: insertelement_i8
 define void @insertelement_i8(%T_i8* %saddr,
                            %T_i8v* %vaddr) {
-  %v0 = load %T_i8v* %vaddr
-  %v1 = load %T_i8* %saddr
+  %v0 = load %T_i8v, %T_i8v* %vaddr
+  %v1 = load %T_i8, %T_i8* %saddr
 ;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
   %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
   store %T_i8v %v2, %T_i8v* %vaddr
@@ -24,8 +24,8 @@ define void @insertelement_i8(%T_i8* %saddr,
 ; CHECK: insertelement_i16
 define void @insertelement_i16(%T_i16* %saddr,
                            %T_i16v* %vaddr) {
-  %v0 = load %T_i16v* %vaddr
-  %v1 = load %T_i16* %saddr
+  %v0 = load %T_i16v, %T_i16v* %vaddr
+  %v1 = load %T_i16, %T_i16* %saddr
 ;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
   %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
   store %T_i16v %v2, %T_i16v* %vaddr
@@ -37,8 +37,8 @@ define void @insertelement_i16(%T_i16* %saddr,
 ; CHECK: insertelement_i32
 define void @insertelement_i32(%T_i32* %saddr,
                            %T_i32v* %vaddr) {
-  %v0 = load %T_i32v* %vaddr
-  %v1 = load %T_i32* %saddr
+  %v0 = load %T_i32v, %T_i32v* %vaddr
+  %v1 = load %T_i32, %T_i32* %saddr
 ;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
   %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
   store %T_i32v %v2, %T_i32v* %vaddr
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
index 368f0a7..1e50f16 100644
--- a/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -19,26 +19,26 @@ define i32 @stores(i32 %arg) {
 }
 define i32 @loads(i32 %arg) {
   ; CHECK: cost of 1 {{.*}} load
-  load i8* undef, align 4
+  load i8, i8* undef, align 4
   ; CHECK: cost of 1 {{.*}} load
-  load i16* undef, align 4
+  load i16, i16* undef, align 4
   ; CHECK: cost of 1 {{.*}} load
-  load i32* undef, align 4
+  load i32, i32* undef, align 4
   ; CHECK: cost of 2 {{.*}} load
-  load i64* undef, align 4
+  load i64, i64* undef, align 4
   ; CHECK: cost of 4 {{.*}} load
-  load i128* undef, align 4
+  load i128, i128* undef, align 4
 
   ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
   ; this with a small expense, but we don't currently.
   ; CHECK: cost of 48 {{.*}} load
-  load <4 x i16>* undef, align 2
+  load <4 x i16>, <4 x i16>* undef, align 2
 
   ; CHECK: cost of 1 {{.*}} load
-  load <4 x i32>* undef, align 4
+  load <4 x i32>, <4 x i32>* undef, align 4
 
   ; CHECK: cost of 46 {{.*}} load
-  load <3 x float>* undef, align 1
+  load <3 x float>, <3 x float>* undef, align 1
 
   ret i32 undef
 }
diff --git a/test/Analysis/CostModel/X86/gep.ll b/test/Analysis/CostModel/X86/gep.ll
index 877184a..a4488ba 100644
--- a/test/Analysis/CostModel/X86/gep.ll
+++ b/test/Analysis/CostModel/X86/gep.ll
@@ -7,33 +7,33 @@ target triple = "x86_64-apple-macosx10.8.0"
 define void @test_geps() {
   ; Cost of should be zero. We expect it to be folded into
   ; the instruction addressing mode.
-;CHECK:  cost of 0 for instruction: {{.*}} getelementptr inbounds i8*
-  %a0 = getelementptr inbounds i8* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16*
-  %a1 = getelementptr inbounds i16* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32*
-  %a2 = getelementptr inbounds i32* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64*
-  %a3 = getelementptr inbounds i64* undef, i32 0
+;CHECK:  cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+  %a0 = getelementptr inbounds i8, i8* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+  %a1 = getelementptr inbounds i16, i16* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+  %a2 = getelementptr inbounds i32, i32* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+  %a3 = getelementptr inbounds i64, i64* undef, i32 0
 
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float*
-  %a4 = getelementptr inbounds float* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double*
-  %a5 = getelementptr inbounds double* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
+  %a4 = getelementptr inbounds float, float* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
+  %a5 = getelementptr inbounds double, double* undef, i32 0
 
  ; Vector geps should also have zero cost.
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
-  %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
-  %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
-  %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
-  %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>*
-  %a11 = getelementptr inbounds <4 x float>* undef, i32 0
-;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>*
-  %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+  %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+  %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+  %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+  %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+  %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+  %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
 
 
   ret void
diff --git a/test/Analysis/CostModel/X86/intrinsic-cost.ll b/test/Analysis/CostModel/X86/intrinsic-cost.ll
index 3b27b52..efc1263 100644
--- a/test/Analysis/CostModel/X86/intrinsic-cost.ll
+++ b/test/Analysis/CostModel/X86/intrinsic-cost.ll
@@ -9,9 +9,9 @@ vector.ph:
 
 vector.body:                                      ; preds = %vector.body, %vector.ph
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %0 = getelementptr inbounds float* %f, i64 %index
+  %0 = getelementptr inbounds float, float* %f, i64 %index
   %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
   store <4 x float> %2, <4 x float>* %1, align 4
   %index.next = add i64 %index, 4
@@ -22,7 +22,7 @@ for.end:                                          ; preds = %vector.body
   ret void
 
 ; CORE2: Printing analysis 'Cost Model Analysis' for function 'test1':
-; CORE2: Cost Model: Found an estimated cost of 400 for instruction:   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+; CORE2: Cost Model: Found an estimated cost of 46 for instruction:   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
 
 ; COREI7: Printing analysis 'Cost Model Analysis' for function 'test1':
 ; COREI7: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
@@ -37,9 +37,9 @@ vector.ph:
 
 vector.body:                                      ; preds = %vector.body, %vector.ph
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %0 = getelementptr inbounds float* %f, i64 %index
+  %0 = getelementptr inbounds float, float* %f, i64 %index
   %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
   %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
   store <4 x float> %2, <4 x float>* %1, align 4
   %index.next = add i64 %index, 4
@@ -50,7 +50,7 @@ for.end:                                          ; preds = %vector.body
   ret void
 
 ; CORE2: Printing analysis 'Cost Model Analysis' for function 'test2':
-; CORE2: Cost Model: Found an estimated cost of 400 for instruction:   %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
+; CORE2: Cost Model: Found an estimated cost of 46 for instruction:   %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
 
 ; COREI7: Printing analysis 'Cost Model Analysis' for function 'test2':
 ; COREI7: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
@@ -65,9 +65,9 @@ vector.ph:
 
 vector.body:                                      ; preds = %vector.body, %vector.ph
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %0 = getelementptr inbounds float* %f, i64 %index
+  %0 = getelementptr inbounds float, float* %f, i64 %index
   %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
   %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
   store <4 x float> %2, <4 x float>* %1, align 4
   %index.next = add i64 %index, 4
diff --git a/test/Analysis/CostModel/X86/load_store.ll b/test/Analysis/CostModel/X86/load_store.ll
index a53d0bd..ccf110a 100644
--- a/test/Analysis/CostModel/X86/load_store.ll
+++ b/test/Analysis/CostModel/X86/load_store.ll
@@ -34,49 +34,49 @@ define i32 @stores(i32 %arg) {
 }
 define i32 @loads(i32 %arg) {
   ;CHECK: cost of 1 {{.*}} load
-  load i8* undef, align 4
+  load i8, i8* undef, align 4
   ;CHECK: cost of 1 {{.*}} load
-  load i16* undef, align 4
+  load i16, i16* undef, align 4
   ;CHECK: cost of 1 {{.*}} load
-  load i32* undef, align 4
+  load i32, i32* undef, align 4
   ;CHECK: cost of 1 {{.*}} load
-  load i64* undef, align 4
+  load i64, i64* undef, align 4
   ;CHECK: cost of 2 {{.*}} load
-  load i128* undef, align 4
+  load i128, i128* undef, align 4
 
   ;CHECK: cost of 1 {{.*}} load
-  load <2 x i32>* undef, align 4
+  load <2 x i32>, <2 x i32>* undef, align 4
   ;CHECK: cost of 1 {{.*}} load
-  load <4 x i32>* undef, align 4
+  load <4 x i32>, <4 x i32>* undef, align 4
   ;CHECK: cost of 2 {{.*}} load
-  load <8 x i32>* undef, align 4
+  load <8 x i32>, <8 x i32>* undef, align 4
 
 
   ;CHECK: cost of 1 {{.*}} load
-  load <2 x i64>* undef, align 4
+  load <2 x i64>, <2 x i64>* undef, align 4
   ;CHECK: cost of 2 {{.*}} load
-  load <4 x i64>* undef, align 4
+  load <4 x i64>, <4 x i64>* undef, align 4
   ;CHECK: cost of 4 {{.*}} load
-  load <8 x i64>* undef, align 4
+  load <8 x i64>, <8 x i64>* undef, align 4
 
 
   ;CHECK: cost of 3 {{.*}} load
-  load <3 x float>* undef, align 4
+  load <3 x float>, <3 x float>* undef, align 4
 
   ;CHECK: cost of 3 {{.*}} load
-  load <3 x double>* undef, align 4
+  load <3 x double>, <3 x double>* undef, align 4
 
   ;CHECK: cost of 3 {{.*}} load
-  load <3 x i32>* undef, align 4
+  load <3 x i32>, <3 x i32>* undef, align 4
 
   ;CHECK: cost of 3 {{.*}} load
-  load <3 x i64>* undef, align 4
+  load <3 x i64>, <3 x i64>* undef, align 4
 
   ;CHECK: cost of 10 {{.*}} load
-  load <5 x i32>* undef, align 4
+  load <5 x i32>, <5 x i32>* undef, align 4
 
   ;CHECK: cost of 10 {{.*}} load
-  load <5 x i64>* undef, align 4
+  load <5 x i64>, <5 x i64>* undef, align 4
 
   ret i32 undef
 }
diff --git a/test/Analysis/CostModel/X86/loop_v2.ll b/test/Analysis/CostModel/X86/loop_v2.ll
index 348444e..9283310 100644
--- a/test/Analysis/CostModel/X86/loop_v2.ll
+++ b/test/Analysis/CostModel/X86/loop_v2.ll
@@ -10,20 +10,20 @@ vector.ph:
 vector.body:                                      ; preds = %vector.body, %vector.ph
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   %vec.phi = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
-  %0 = getelementptr inbounds i32* %A, i64 %index
+  %0 = getelementptr inbounds i32, i32* %A, i64 %index
   %1 = bitcast i32* %0 to <2 x i32>*
-  %2 = load <2 x i32>* %1, align 4
+  %2 = load <2 x i32>, <2 x i32>* %1, align 4
   %3 = sext <2 x i32> %2 to <2 x i64>
   ;CHECK: cost of 1 {{.*}} extract
   %4 = extractelement <2 x i64> %3, i32 0
-  %5 = getelementptr inbounds i32* %A, i64 %4
+  %5 = getelementptr inbounds i32, i32* %A, i64 %4
   ;CHECK: cost of 1 {{.*}} extract
   %6 = extractelement <2 x i64> %3, i32 1
-  %7 = getelementptr inbounds i32* %A, i64 %6
-  %8 = load i32* %5, align 4
+  %7 = getelementptr inbounds i32, i32* %A, i64 %6
+  %8 = load i32, i32* %5, align 4
   ;CHECK: cost of 1 {{.*}} insert
   %9 = insertelement <2 x i32> undef, i32 %8, i32 0
-  %10 = load i32* %7, align 4
+  %10 = load i32, i32* %7, align 4
   ;CHECK: cost of 1 {{.*}} insert
   %11 = insertelement <2 x i32> %9, i32 %10, i32 1
   %12 = add nsw <2 x i32> %11, %vec.phi
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
index 7bc8d89..78bf0a6 100644
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -7,7 +7,7 @@ entry:
   ; SSE2: shift2i16
   ; SSE2: cost of 20 {{.*}} lshr
   ; SSE2-CODEGEN: shift2i16
-  ; SSE2-CODEGEN: shrq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = lshr %shifttype %a , %b
   ret %shifttype %0
@@ -67,7 +67,7 @@ entry:
   ; SSE2: shift2i32
   ; SSE2: cost of 20 {{.*}} lshr
   ; SSE2-CODEGEN: shift2i32
-  ; SSE2-CODEGEN: shrq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = lshr %shifttype2i32 %a , %b
   ret %shifttype2i32 %0
@@ -127,7 +127,7 @@ entry:
   ; SSE2: shift2i64
   ; SSE2: cost of 20 {{.*}} lshr
   ; SSE2-CODEGEN: shift2i64
-  ; SSE2-CODEGEN: shrq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = lshr %shifttype2i64 %a , %b
   ret %shifttype2i64 %0
@@ -139,7 +139,7 @@ entry:
   ; SSE2: shift4i64
   ; SSE2: cost of 40 {{.*}} lshr
   ; SSE2-CODEGEN: shift4i64
-  ; SSE2-CODEGEN: shrq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = lshr %shifttype4i64 %a , %b
   ret %shifttype4i64 %0
@@ -151,7 +151,7 @@ entry:
   ; SSE2: shift8i64
   ; SSE2: cost of 80 {{.*}} lshr
   ; SSE2-CODEGEN: shift8i64
-  ; SSE2-CODEGEN: shrq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = lshr %shifttype8i64 %a , %b
   ret %shifttype8i64 %0
@@ -163,7 +163,7 @@ entry:
   ; SSE2: shift16i64
   ; SSE2: cost of 160 {{.*}} lshr
   ; SSE2-CODEGEN: shift16i64
-  ; SSE2-CODEGEN: shrq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = lshr %shifttype16i64 %a , %b
   ret %shifttype16i64 %0
@@ -175,7 +175,7 @@ entry:
   ; SSE2: shift32i64
   ; SSE2: cost of 320 {{.*}} lshr
   ; SSE2-CODEGEN: shift32i64
-  ; SSE2-CODEGEN: shrq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = lshr %shifttype32i64 %a , %b
   ret %shifttype32i64 %0
@@ -187,7 +187,7 @@ entry:
   ; SSE2: shift2i8
   ; SSE2: cost of 20 {{.*}} lshr
   ; SSE2-CODEGEN: shift2i8
-  ; SSE2-CODEGEN: shrq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = lshr %shifttype2i8 %a , %b
   ret %shifttype2i8 %0
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll
index 40effd0..c36e0f5 100644
--- a/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -7,7 +7,7 @@ entry:
   ; SSE2: shift2i16
   ; SSE2: cost of 20 {{.*}} shl
   ; SSE2-CODEGEN: shift2i16
-  ; SSE2-CODEGEN: shlq %cl
+  ; SSE2-CODEGEN: psllq
 
   %0 = shl %shifttype %a , %b
   ret %shifttype %0
@@ -67,7 +67,7 @@ entry:
   ; SSE2: shift2i32
   ; SSE2: cost of 20 {{.*}} shl
   ; SSE2-CODEGEN: shift2i32
-  ; SSE2-CODEGEN: shlq %cl
+  ; SSE2-CODEGEN: psllq
 
   %0 = shl %shifttype2i32 %a , %b
   ret %shifttype2i32 %0
@@ -127,7 +127,7 @@ entry:
   ; SSE2: shift2i64
   ; SSE2: cost of 20 {{.*}} shl
   ; SSE2-CODEGEN: shift2i64
-  ; SSE2-CODEGEN: shlq %cl
+  ; SSE2-CODEGEN: psllq
 
   %0 = shl %shifttype2i64 %a , %b
   ret %shifttype2i64 %0
@@ -139,7 +139,7 @@ entry:
   ; SSE2: shift4i64
   ; SSE2: cost of 40 {{.*}} shl
   ; SSE2-CODEGEN: shift4i64
-  ; SSE2-CODEGEN: shlq %cl
+  ; SSE2-CODEGEN: psllq
 
   %0 = shl %shifttype4i64 %a , %b
   ret %shifttype4i64 %0
@@ -151,7 +151,7 @@ entry:
   ; SSE2: shift8i64
   ; SSE2: cost of 80 {{.*}} shl
   ; SSE2-CODEGEN: shift8i64
-  ; SSE2-CODEGEN: shlq %cl
+  ; SSE2-CODEGEN: psllq
 
   %0 = shl %shifttype8i64 %a , %b
   ret %shifttype8i64 %0
@@ -163,7 +163,7 @@ entry:
   ; SSE2: shift16i64
   ; SSE2: cost of 160 {{.*}} shl
   ; SSE2-CODEGEN: shift16i64
-  ; SSE2-CODEGEN: shlq %cl
+  ; SSE2-CODEGEN: psllq
 
   %0 = shl %shifttype16i64 %a , %b
   ret %shifttype16i64 %0
@@ -175,7 +175,7 @@ entry:
   ; SSE2: shift32i64
   ; SSE2: cost of 320 {{.*}} shl
   ; SSE2-CODEGEN: shift32i64
-  ; SSE2-CODEGEN: shlq %cl
+  ; SSE2-CODEGEN: psllq
 
   %0 = shl %shifttype32i64 %a , %b
   ret %shifttype32i64 %0
@@ -187,7 +187,7 @@ entry:
   ; SSE2: shift2i8
   ; SSE2: cost of 20 {{.*}} shl
   ; SSE2-CODEGEN: shift2i8
-  ; SSE2-CODEGEN: shlq %cl
+  ; SSE2-CODEGEN: psllq
 
   %0 = shl %shifttype2i8 %a , %b
   ret %shifttype2i8 %0
diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll
index af7d1df..2dd52a0 100644
--- a/test/Analysis/CostModel/X86/vectorized-loop.ll
+++ b/test/Analysis/CostModel/X86/vectorized-loop.ll
@@ -25,17 +25,17 @@ for.body.lr.ph:                                   ; preds = %entry
 vector.body:                                      ; preds = %for.body.lr.ph, %vector.body
   %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body.lr.ph ]
   %3 = add i64 %index, 2
-  %4 = getelementptr inbounds i32* %B, i64 %3
+  %4 = getelementptr inbounds i32, i32* %B, i64 %3
   ;CHECK: cost of 0 {{.*}} bitcast
   %5 = bitcast i32* %4 to <8 x i32>*
   ;CHECK: cost of 2 {{.*}} load
-  %6 = load <8 x i32>* %5, align 4
+  %6 = load <8 x i32>, <8 x i32>* %5, align 4
   ;CHECK: cost of 4 {{.*}} mul
   %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  %8 = getelementptr inbounds i32* %A, i64 %index
+  %8 = getelementptr inbounds i32, i32* %A, i64 %index
   %9 = bitcast i32* %8 to <8 x i32>*
   ;CHECK: cost of 2 {{.*}} load
-  %10 = load <8 x i32>* %9, align 4
+  %10 = load <8 x i32>, <8 x i32>* %9, align 4
   ;CHECK: cost of 4 {{.*}} add
   %11 = add nsw <8 x i32> %10, %7
   ;CHECK: cost of 2 {{.*}} store
@@ -52,14 +52,14 @@ middle.block:                                     ; preds = %vector.body, %for.b
 for.body:                                         ; preds = %middle.block, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %end.idx.rnd.down, %middle.block ]
   %13 = add nsw i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds i32* %B, i64 %13
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %13
   ;CHECK: cost of 1 {{.*}} load
-  %14 = load i32* %arrayidx, align 4
+  %14 = load i32, i32* %arrayidx, align 4
   ;CHECK: cost of 1 {{.*}} mul
   %mul = mul nsw i32 %14, 5
-  %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
   ;CHECK: cost of 1 {{.*}} load
-  %15 = load i32* %arrayidx2, align 4
+  %15 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %15, %mul
   store i32 %add3, i32* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1