17 files changed, 236 insertions, 90 deletions
diff --git a/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
new file mode 100644
index 0000000..5cfc68d
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+@.str = external constant [36 x i8], align 1      ; <[36 x i8]*> [#uses=0]
+@.str1 = external constant [31 x i8], align 1     ; <[31 x i8]*> [#uses=1]
+@.str2 = external constant [4 x i8], align 1      ; <[4 x i8]*> [#uses=1]
+
+declare i32 @getUnknown(i32, ...) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+  %0 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+  %1 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+  %2 = tail call  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+  %3 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
new file mode 100644
index 0000000..06a152d
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
@@ -0,0 +1,106 @@
+; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-apple-darwin9"
+
+@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
+@nodes = internal global i64 0                    ; <i64*> [#uses=4]
+@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
+@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
+@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
+@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [28 x i8] c"score = %d (%c)  work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
+@plycnt = internal global i32 0                   ; <i32*> [#uses=21]
+@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
+@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
+@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
+@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
+@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
+@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
+@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
+@he = internal global i8* null                    ; <i8**> [#uses=9]
+@hits = internal global i64 0                     ; <i64*> [#uses=8]
+@posed = internal global i64 0                    ; <i64*> [#uses=7]
+@ht = internal global i32* null                   ; <i32**> [#uses=5]
+@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
+@.str117 = private constant [45 x i8] c"- %5.3f  < %5.3f  = %5.3f  > %5.3f  + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
+@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare i32 @getchar() nounwind
+
+define internal i32 @transpose() nounwind readonly {
+; CHECK: push
+entry:
+  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+  %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
+  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+  %3 = or i32 %1, %2                              ; <i32> [#uses=1]
+  %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
+  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+  %6 = or i32 %4, %5                              ; <i32> [#uses=3]
+  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+  %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
+  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+  %10 = or i32 %8, %9                             ; <i32> [#uses=1]
+  %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
+  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+  %13 = or i32 %11, %12                           ; <i32> [#uses=3]
+  %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
+  %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
+  %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
+  %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
+  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+  %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
+  %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
+  %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
+  %.pn1.i = zext i32 %.pn1.in.i to i64            ; <i64> [#uses=1]
+  %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i          ; <i64> [#uses=2]
+  %15 = lshr i64 %iftmp.22.0.i, 17                ; <i64> [#uses=1]
+  %16 = trunc i64 %15 to i32                      ; <i32> [#uses=2]
+  %17 = urem i64 %iftmp.22.0.i, 1050011           ; <i64> [#uses=1]
+  %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
+  %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
+  %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
+  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
+  br label %bb5
+
+bb:                                               ; preds = %bb5
+  %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
+  %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
+  br i1 %24, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
+  %26 = getelementptr inbounds i8* %25, i32 %x.0  ; <i8*> [#uses=1]
+  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
+  %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
+  ret i32 %28
+
+bb2:                                              ; preds = %bb
+  %29 = add nsw i32 %20, %x.0                     ; <i32> [#uses=3]
+  %30 = add i32 %29, -1050011                     ; <i32> [#uses=1]
+  %31 = icmp sgt i32 %29, 1050010                 ; <i1> [#uses=1]
+  %. = select i1 %31, i32 %30, i32 %29            ; <i32> [#uses=1]
+  %32 = add i32 %33, 1                            ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %entry
+  %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ]      ; <i32> [#uses=2]
+  %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ]    ; <i32> [#uses=3]
+  %34 = icmp sgt i32 %33, 7                       ; <i1> [#uses=1]
+  br i1 %34, label %bb7, label %bb
+
+bb7:                                              ; preds = %bb5
+  ret i32 -128
+}
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 98a5263..45d356c 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -11,8 +11,8 @@
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
-; CHECK-NEXT: subeq.w r0, r6, r8
-; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc}
+; CHECK-NEXT: subeq r0, r6, r7
+; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
index e0946c7..2246de3 100644
--- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
+++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
@@ -7,19 +7,12 @@
 define void @t() nounwind ssp {
 entry:
 ; CHECK: t:
-; CHECK:  push  {r4, r7}
-; CHECK:  mov r0, sp
-; CHECK:  add r7, sp, #4
-; CHECK:  bic r0, r0, #7
+  %size = mul i32 8, 2
 ; CHECK:  subs  r0, #16
 ; CHECK:  mov sp, r0
-; CHECK:  mov r0, sp
-; CHECK:  bic r0, r0, #7
+  %vla_a = alloca i8, i32 %size, align 8
 ; CHECK:  subs  r0, #16
 ; CHECK:  mov sp, r0
-
-  %size = mul i32 8, 2
-  %vla_a = alloca i8, i32 %size, align 8
   %vla_b = alloca i8, i32 %size, align 8
   unreachable
 }
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 2675006..9ed6a01 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -23,8 +23,9 @@ entry:
   %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
 ; Constant pool load followed by add.
 ; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64 [[LDR:d.]]
-; CHECK: vadd.f64 [[ADD:d.]], [[LDR]], [[LDR]]
+; CHECK: vldr.64 [[LDR:d.*]],
+; CHECK: LPC0_0:
+; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
 ; CHECK: vmov.f64 [[LDR]]
   %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2]
   %6 = fadd <2 x double> %4, %4                   ; <<2 x double>> [#uses=2]
diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
index c5fc509..f91e1c9 100644
--- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
+++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s
 ; rdar://8115404
 ; Tail merging must not split an IT block.
 
diff --git a/test/CodeGen/Thumb2/buildvector-crash.ll b/test/CodeGen/Thumb2/buildvector-crash.ll
new file mode 100644
index 0000000..01ef472
--- /dev/null
+++ b/test/CodeGen/Thumb2/buildvector-crash.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; Formerly crashed, 3573915.
+
+define void @RotateStarsFP_Vec() nounwind {
+bb.nph372:
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb.nph372
+  %0 = fadd <4 x float> undef, <float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000>
+  %1 = fmul <4 x float> %0, undef
+  %2 = fmul <4 x float> %1, undef
+  %3 = fadd <4 x float> undef, %2
+  store <4 x float> %3, <4 x float>* undef, align 4
+  br label %bb8
+; CHECK: RotateStarsFP_Vec:
+; CHECK: vldmia
+}
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index f7ec5a3..d06f8a7 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -19,6 +19,6 @@ entry:
   %0 = fmul double %a, %b
 ; CORTEXM3: blx ___muldf3
 ; CORTEXM4: blx ___muldf3
-; CORTEXA8: vmul.f64  d0, d1, d0
+; CORTEXA8: vmul.f64  d16, d17, d16
   ret double %0
 }
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 583f405..c169fb3 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,15 +1,23 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
 
 define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
+; CHECK: fht:
 entry:
   br label %bb5
 
 bb5:                                              ; preds = %bb5, %entry
+; CHECK: %bb5
+; CHECK: bne
   br i1 undef, label %bb5, label %bb.nph
 
 bb.nph:                                           ; preds = %bb5
   br label %bb7
 
+; Loop preheader
+; CHECK: vmov.f32
+; CHECK: vsub.f32
+; CHECK: vadd.f32
+; CHECK: vmul.f32
 bb7:                                              ; preds = %bb9, %bb.nph
   %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
   %tmp79 = add i32 undef, undef                   ; <i32> [#uses=1]
@@ -19,6 +27,9 @@ bb7:                                              ; preds = %bb9, %bb.nph
   br label %bb8
 
 bb8:                                              ; preds = %bb8, %bb7
+; CHECK: %bb8
+; CHECK-NOT: vmov.f32
+; CHECK: blt
   %tmp54 = add i32 0, %tmp53                      ; <i32> [#uses=0]
   %fi.1 = getelementptr float* %fz, i32 undef     ; <float*> [#uses=2]
   %tmp80 = add i32 0, %tmp79                      ; <i32> [#uses=1]
@@ -62,6 +73,8 @@ bb8:                                              ; preds = %bb8, %bb7
   br i1 %34, label %bb8, label %bb9
 
 bb9:                                              ; preds = %bb8
+; CHECK: %bb9
+; CHECK: vmov.f32
   %35 = fadd float 0.000000e+00, undef            ; <float> [#uses=1]
   br label %bb7
 }
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 9729534..763d316 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -27,7 +27,7 @@ define i32 @test3() {
 ; DARWIN: sub.w sp, sp, #805306368
 ; DARWIN: sub sp, #20
 ; LINUX: test3:
-; LINUX: stmdb   sp!, {r4, r7, r11, lr}
+; LINUX: push {r4, r7, r11, lr}
 ; LINUX: sub.w sp, sp, #805306368
 ; LINUX: sub sp, #16
     %retval = alloca i32, align 4
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 29b8e75..650d788 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -22,7 +22,7 @@
 
 define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
 entry:
-; CHECK:       ldr.w	r9, [r7, #28]
+; CHECK:       ldr.w	{{(r[0-9])|(lr)}}, [r7, #28]
   %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   br label %bb20
@@ -46,9 +46,9 @@ bb119:                                            ; preds = %bb20, %bb20
 
 bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: bb420
-; CHECK: str r{{[0-7]}}, [sp]
-; CHECK: str r{{[0-7]}}, [sp, #4]
-; CHECK: str r{{[0-7]}}, [sp, #8]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8]
 ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 7fa782f..ad957a1 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -21,8 +21,8 @@ entry:
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB0_1:
 ; CHECK: cmp r2, #0
-; CHECK: sub.w r9, r2, #1
-; CHECK: mov r2, r9
+; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1
+; CHECK: mov r2, [[REGISTER]]
 
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll
deleted file mode 100644
index fde2ee0..0000000
--- a/test/CodeGen/Thumb2/machine-licm-vdup.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim                -arm-vdup-splat | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s 
-; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
-; Eventually this should become the default and be moved into machine-licm.ll.
-; FIXME: the vdup should be hoisted out of the loop, 8248029.
-
-define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
-entry:
-; CHECK: t2:
-; CHECK: mov.w r3, #1065353216
-  br i1 undef, label %bb1, label %bb2
-
-bb1:
-; CHECK-NEXT: %bb1
-; CHECK: vdup.32 q1, r3
-  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
-  %tmp1 = shl i32 %indvar, 2
-  %gep1 = getelementptr i8* %ptr1, i32 %tmp1
-  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
-  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
-  %gep2 = getelementptr i8* %ptr2, i32 %tmp1
-  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
-  %indvar.next = add i32 %indvar, 1
-  %cond = icmp eq i32 %indvar.next, 10
-  br i1 %cond, label %bb2, label %bb1
-
-bb2:
-  ret void
-}
-
-; CHECK-NOT: LCPI1_0:
-; CHECK: .subsections_via_symbols
-
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index b949b2f..14d04a4 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -55,8 +55,8 @@ return:                                           ; preds = %bb, %entry
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: adr r{{.}}, #LCPI1_0
-; CHECK: vldmia r3, {d0, d1}
+; CHECK: mov.w r3, #1065353216
+; CHECK: vdup.32 q{{.*}}, r3
   br i1 undef, label %bb1, label %bb2
 
 bb1:
@@ -76,8 +76,8 @@ bb2:
   ret void
 }
 
-; CHECK: LCPI1_0:
-; CHECK: .section
+; CHECK-NOT: LCPI1_0:
+; CHECK: .subsections_via_symbols
 
 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
deleted file mode 100644
index 4df06b8..0000000
--- a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
-
-define void @b(i32 %x) nounwind optsize {
-entry:
-; CHECK: b
-; CHECK: mov r2, sp
-; CHECK: mls r0, r0, r1, r2
-; CHECK: mov sp, r0
-  %0 = mul i32 %x, 24                             ; <i32> [#uses=1]
-  %vla = alloca i8, i32 %0, align 1               ; <i8*> [#uses=1]
-  call arm_aapcscc  void @a(i8* %vla) nounwind optsize
-  ret void
-}
-
-declare void @a(i8*) optsize
diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll
index a54d09e6..93ae7c4 100644
--- a/test/CodeGen/Thumb2/thumb2-barrier.ll
+++ b/test/CodeGen/Thumb2/thumb2-barrier.ll
@@ -1,17 +1,31 @@
 ; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
 
-declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
+declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1)
 
-define void @t1() {
-; CHECK: t1:
-; CHECK: dsb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
+define void @t_st() {
+; CHECK: t_st:
+; CHECK: dmb st
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 true)
   ret void
 }
 
-define void @t2() {
-; CHECK: t2:
-; CHECK: dmb
-  call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
+define void @t_sy() {
+; CHECK: t_sy:
+; CHECK: dmb sy
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 true)
+  ret void
+}
+
+define void @t_ishst() {
+; CHECK: t_ishst:
+; CHECK: dmb ishst
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 false)
+  ret void
+}
+
+define void @t_ish() {
+; CHECK: t_ish:
+; CHECK: dmb ish
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 false)
   ret void
 }
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 4f92c93..0d73fba 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -20,6 +20,26 @@ entry:
   %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 0.000000e+00, float* undef, align 4
   %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
   %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
@@ -44,7 +64,16 @@ bb4:                                              ; preds = %bb193, %entry
   %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
   %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
   %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
-  %21 = fadd <4 x float> zeroinitializer, %20     ; <<4 x float>> [#uses=2]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
   %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
   %tmp = extractelement <4 x i1> %22, i32 0
   br i1 %tmp, label %bb193, label %bb186