73 files changed, 1532 insertions, 161 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3694aaa..0bfe331 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index df9dbca..0ae7f84 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -11,7 +11,7 @@ entry:
 
 ; THUMB:     t:
 ; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r2, [r1]
+; THUMB:     str r1, [r0]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
   store i32 0, i32* inttoptr (i32 8 to i32*), align 8
   br i1 undef, label %bb.nph96, label %bb3
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index a65cf4b..e0f50c9 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=basic
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
 
 ; This test would crash the rewriter when trying to handle a spill after one of
 ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index b9d5600..1aee508 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10"
 
 ; CHECK: vld1.64 {d16, d17}, [r{{.}}]
 ; CHECK-NOT: vld1.64 {d16, d17}
-; CHECK: vmov.f64 d19, d16
+; CHECK: vmov.f64
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index e3c18ce..da4d157 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; RUN: llc < %s -mtriple=armv7-apple-ios   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2
 ; rdar://8690640
 
 define i32* @t(i32* %x) nounwind {
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index c65952b..23e1aa1 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
 ; rdar://8728956
 
 define hidden void @foo() nounwind ssp {
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index ccda281..2faa04a 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 7baacfe..3e78c46 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+target triple = "thumbv7-apple-ios"
 
 %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
 %struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 17264ee..216057a 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; Test that ldmia_ret preserves implicit operands for return values.
 ;
 ; This CFG is reduced from a benchmark miscompile. With current
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 86e8712..6fbae19 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
 
 ; CHECK:      movw    r1, :lower16:{{.*}}
 ; CHECK:      movt    r1, :upper16:{{.*}}
-; CHECK:      vldmia  r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]}
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short3]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short2]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short1]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short0]]
+; CHECK:      vldmia  r1
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vstmia  {{.*}}
 
 L.entry:
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
new file mode 100644
index 0000000..ddb7632
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Radar 10567930: Make sure that all the caller-saved registers are saved and
+; restored in a function with setjmp/longjmp EH.  In particular, r6 was not
+; being saved here.
+; CHECK: push {r4, r5, r6, r7, lr}
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) {
+bb:
+  %tmp = alloca i32, align 4
+  %tmp1 = alloca i32, align 4
+  %tmp2 = alloca i8*, align 4
+  %tmp3 = alloca i1
+  %myException = alloca %0*, align 4
+  %tmp4 = alloca i8*
+  %tmp5 = alloca i32
+  %exception = alloca %0*, align 4
+  store i32 %a, i32* %tmp, align 4
+  store i32 %b, i32* %tmp1, align 4
+  store i8* %d, i8** %tmp2, align 4
+  store i1 false, i1* %tmp3
+  %tmp7 = load i8** %c
+  %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
+          to label %bb11 unwind label %bb15
+
+bb11:                                             ; preds = %bb
+  store %0* %tmp10, %0** %myException, align 4
+  %tmp12 = load %0** %myException, align 4
+  %tmp13 = bitcast %0* %tmp12 to i8*
+  invoke void @objc_exception_throw(i8* %tmp13) noreturn
+          to label %bb14 unwind label %bb15
+
+bb14:                                             ; preds = %bb11
+  unreachable
+
+bb15:                                             ; preds = %bb11, %bb
+  %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  %tmp17 = extractvalue { i8*, i32 } %tmp16, 0
+  store i8* %tmp17, i8** %tmp4
+  %tmp18 = extractvalue { i8*, i32 } %tmp16, 1
+  store i32 %tmp18, i32* %tmp5
+  store i1 true, i1* %tmp3
+  br label %bb56
+
+bb56:
+  unreachable
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare i32 @__objc_personality_v0(...)
+declare void @objc_exception_throw(i8*)
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
new file mode 100644
index 0000000..926daaf
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb4, %bb2
+  %tmp = icmp slt i32 undef, undef
+  br i1 %tmp, label %bb4, label %bb67
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
+  %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp10 = fmul <4 x float> undef, %tmp9
+  %tmp11 = fadd <4 x float> undef, %tmp10
+  %tmp12 = bitcast <4 x float> zeroinitializer to i128
+  %tmp13 = lshr i128 %tmp12, 64
+  %tmp14 = trunc i128 %tmp13 to i64
+  %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
+  %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
+  %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
+  %tmp18 = fmul <4 x float> %tmp17, %tmp16
+  %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
+  %tmp20 = fmul <4 x float> %tmp19, %tmp18
+  %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
+  %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
+  call arm_aapcs_vfpcc  void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
+  %tmp23 = bitcast <4 x float> %tmp22 to i128
+  %tmp24 = trunc i128 %tmp23 to i64
+  %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
+  %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
+  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
+  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
+  %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
+  %tmp35 = fmul <4 x float> %tmp34, undef
+  %tmp36 = fmul <4 x float> %tmp35, undef
+  %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
+  %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
+  %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp44 = fmul <4 x float> %tmp33, %tmp43
+  %tmp45 = fadd <4 x float> %tmp42, %tmp44
+  %tmp46 = fsub <4 x float> %tmp45, undef
+  %tmp47 = fmul <4 x float> %tmp46, %tmp36
+  %tmp48 = fadd <4 x float> undef, %tmp47
+  %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
+  %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
+  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
+  %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
+  %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
+  %tmp58 = fmul <4 x float> undef, %tmp57
+  %tmp59 = fsub <4 x float> %tmp51, %tmp48
+  %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
+  %tmp61 = fmul <4 x float> %tmp59, %tmp60
+  %tmp62 = fadd <4 x float> %tmp48, %tmp61
+  call arm_aapcs_vfpcc  void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
+  %tmp63 = bitcast <4 x float> %tmp62 to i128
+  %tmp64 = lshr i128 %tmp63, 64
+  %tmp65 = trunc i128 %tmp64 to i64
+  %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
+  call arm_aapcs_vfpcc  void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
+  br label %bb3
+
+bb67:                                             ; preds = %bb3
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
+
+declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
new file mode 100644
index 0000000..872eca3
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
+; PR11841
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
+bb:
+  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp2 = extractelement <2 x float> %tmp, i32 0
+  %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
+  %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+  %tmp7 = extractelement <2 x float> %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1
+  %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3
+  %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64>
+  %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float>
+  %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64>
+  %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float>
+  %tmp18 = extractelement <2 x float> %tmp17, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind
+  %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64>
+  %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float>
+  %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64>
+  %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float>
+  %tmp26 = extractelement <2 x float> %tmp25, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind
+  ret void
+}
+
+define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
+entry:
+  br i1 undef, label %for.end, label %cond.end295
+
+cond.end295:                                      ; preds = %entry
+  %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
+  %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
+  %1 = bitcast <4 x float> undef to <2 x i64>
+  %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+  unreachable
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
new file mode 100644
index 0000000..ec5b2e9
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -verify-coalescing
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ]
+  %3 = bitcast <4 x float> %2 to <2 x i64>
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer
+  %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1>
+  %6 = bitcast <2 x i32> zeroinitializer to <2 x float>
+  %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2>
+  %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1>
+  %9 = bitcast <2 x float> %7 to <1 x i64>
+  %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1>
+  %11 = bitcast <2 x i64> %10 to <4 x float>
+  br label %1
+}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
new file mode 100644
index 0000000..5f24e42
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs
+; PR11765
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+; This test case exercises the MachineCopyPropagation pass by disabling the
+; RegisterCoalescer.
+
+define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br i1 undef, label %bb92, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = or <4 x i32> undef, undef
+  %tmp4 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4
+  %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64>
+  %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float>
+  %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2>
+  %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2>
+  %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64>
+  %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64>
+  %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32>
+  %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0>
+  %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32>
+  %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1>
+  %tmp22 = or <2 x i32> %tmp19, %tmp21
+  %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+  %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float>
+  %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32>
+  %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0>
+  %tmp29 = and <2 x i32> undef, <i32 0, i32 -1>
+  %tmp30 = or <2 x i32> %tmp28, %tmp29
+  %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64>
+  %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3
+  %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp34 = fadd <4 x float> %tmp33, %tmp32
+  %tmp35 = fmul <4 x float> %tmp33, zeroinitializer
+  %tmp36 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp37 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64>
+  %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
+  %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp43 = fmul <4 x float> %tmp42, %tmp41
+  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp45 = fadd <4 x float> undef, %tmp43
+  %tmp46 = fadd <4 x float> undef, %tmp45
+  %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
+  %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float>
+  %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp51 = fmul <4 x float> %tmp42, %tmp50
+  %tmp52 = fmul <4 x float> %tmp44, undef
+  %tmp53 = fadd <4 x float> %tmp52, %tmp51
+  %tmp54 = fadd <4 x float> undef, %tmp53
+  %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64>
+  %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float>
+  %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp59 = fmul <4 x float> undef, %tmp58
+  %tmp60 = fadd <4 x float> %tmp59, undef
+  %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
+  %tmp62 = load void (i8*, i8*)** undef, align 4
+  call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
+  %tmp63 = bitcast <4 x float> %tmp46 to i128
+  %tmp64 = bitcast <4 x float> %tmp54 to i128
+  %tmp65 = bitcast <4 x float> %tmp61 to i128
+  %tmp66 = lshr i128 %tmp63, 64
+  %tmp67 = trunc i128 %tmp66 to i64
+  %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1
+  %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2
+  %tmp70 = lshr i128 %tmp64, 64
+  %tmp71 = trunc i128 %tmp70 to i64
+  %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3
+  %tmp73 = trunc i128 %tmp65 to i64
+  %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4
+  %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5
+  %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6
+  %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7
+  call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
+  %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
+  %tmp79 = bitcast i8* %tmp78 to i512*
+  %tmp80 = load i512* %tmp79, align 16
+  %tmp81 = lshr i512 %tmp80, 128
+  %tmp82 = trunc i512 %tmp80 to i128
+  %tmp83 = trunc i512 %tmp81 to i128
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>
+  %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64>
+  %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float>
+  %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp89 = fmul <4 x float> undef, %tmp88
+  %tmp90 = fadd <4 x float> %tmp89, undef
+  %tmp91 = fadd <4 x float> undef, %tmp90
+  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+bb92:                                             ; preds = %bb2
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
new file mode 100644
index 0000000..6c7aaad
--- /dev/null
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-coalescing < %s
+; PR11868
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+%1 = type { <4 x float> }
+
+@foo = external global %0, align 16
+
+define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
+  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = insertelement <4 x float> undef, float %5, i32 0
+  %9 = insertelement <4 x float> %8, float %6, i32 1
+  %10 = insertelement <4 x float> %9, float %7, i32 2
+  %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
+  store <4 x float> %11, <4 x float>* undef, align 16 
+  call arm_aapcs_vfpcc  void @baz(%1* undef, float 0.000000e+00) nounwind
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @baz(%1*, float)
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 95edaad..1272e8e 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 02ce5a1..8967730 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -61,7 +61,7 @@ entry:
   ; CHECK: strex
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
-	%neg = sub i32 0, 1		; <i32> [#uses=1]
+	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
@@ -77,5 +77,85 @@ entry:
   ; CHECK: strex
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
-	ret void
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
+	store i32 %11, i32* %old
+	%uneg = sub i32 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
+	store i32 %12, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
+	store i32 %13, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
+	store i32 %14, i32* %old
+
+  ret void
+}
+
+define void @func2() nounwind {
+entry:
+  %val = alloca i16
+  %old = alloca i16
+  store i16 31, i16* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i16* %val, i16 16 monotonic
+  store i16 %0, i16* %old
+  %uneg = sub i16 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
+  store i16 %1, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i16* %val, i16 1 monotonic
+  store i16 %2, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i16* %val, i16 0 monotonic
+  store i16 %3, i16* %old
+  ret void
+}
+
+define void @func3() nounwind {
+entry:
+  %val = alloca i8
+  %old = alloca i8
+  store i8 31, i8* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i8* %val, i8 16 monotonic
+  store i8 %0, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %uneg = sub i8 0, 1
+  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
+  store i8 %1, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i8* %val, i8 1 monotonic
+  store i8 %2, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i8* %val, i8 0 monotonic
+  store i8 %3, i8* %old
+  ret void
 }
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 877ec18..1b385ab 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -6,9 +6,9 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
 ; CHECK: t1:
-; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
-; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
   %0 = mul nsw i32 %a, %b
   %1 = mul nsw i32 %c, %d
   %2 = mul nsw i32 %0, %1
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f78d998..be3e105 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 ; Enable tailcall optimization for iOS 5.0
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef659..487ec69 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
 ; CHECK: bx lr
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
new file mode 100644
index 0000000..eff5de5
--- /dev/null
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv6-apple-ios0.0.0"
+
+; Don't CSE a cmp across a call that clobbers CPSR.
+;
+; CHECK: cmp
+; CHECK: S_trimzeros
+; CHECK: cmp
+; CHECK: strlen
+
+@F_floatmul.man1 = external global [200 x i8], align 1
+@F_floatmul.man2 = external global [200 x i8], align 1
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+declare void @S_trimzeros(...)
+
+define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp {
+entry:
+  br i1 undef, label %while.end42, label %while.body37
+
+while.body37:                                     ; preds = %while.body37, %entry
+  br i1 false, label %while.end42, label %while.body37
+
+while.end42:                                      ; preds = %while.body37, %entry
+  %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+  %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+  tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
+  %call47 = tail call i32 @strlen(i8* %.) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 0dcf9dd..1d011be 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8"
 
 ; Without CSE of libcalls, there are two calls in the output instead of one.
 
-define i32 @u_f_nonbon(double %lambda) nounwind {
+define double @u_f_nonbon(double %lambda) nounwind {
 entry:
 	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
@@ -26,5 +26,5 @@ bb502.loopexit.i:		; preds = %bb28.i
 	br i1 false, label %bb.nph53.i, label %bb508.i
 
 bb508.i:		; preds = %bb502.loopexit.i, %entry
-	ret i32 1
+	ret double %tmp10.i4
 }
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
index 7f00eb3..6419292 100644
--- a/test/CodeGen/ARM/ctor_order.ll
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -6,13 +6,15 @@
 ; DARWIN:      .long _f151
 ; DARWIN-NEXT: .long _f152
 
-; ELF:      .section .ctors,"aw",%progbits
+; ELF:      .section .ctors.65384,"aw",%progbits
+; ELF:      .long    f151
+; ELF:      .section .ctors.65383,"aw",%progbits
 ; ELF:      .long    f152
-; ELF-NEXT: .long    f151
 
-; GNUEABI:      .section .init_array,"aw",%init_array
+; GNUEABI:      .section .init_array.151,"aw",%init_array
 ; GNUEABI:      .long    f151
-; GNUEABI-NEXT: .long    f152
+; GNUEABI:      .section .init_array.152,"aw",%init_array
+; GNUEABI:      .long    f152
 
 
 @llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index b0270f9..a7b44e6 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -2,7 +2,7 @@
 ; Test to check argument y's debug info uses FI
 ; Radar 10048772
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.tag_s = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 00e6cb0..0ad0a15 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -2,7 +2,7 @@
 ; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %0 = type opaque
 %1 = type { [4 x i32] }
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index 3972e68..ae7af0a 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp0
 ;CHECK-NEXT:        .long   Ltmp1
-;CHECK-NEXT:        .long   Ltmp2
-;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp10-Ltmp9        @ Loc expr size
+;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
 ;CHECK-NEXT:        .short  Lset[[N]]
-;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT: Ltmp[[M]]:
 ;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
 
 define void @_Z3foov() optsize ssp {
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
new file mode 100644
index 0000000..fd7d0e6
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -0,0 +1,16 @@
+; Test that the EHABI unwind instruction generator does not encounter any
+; unfamiliar instructions.
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+
+define void @_Z1fv() nounwind {
+entry:
+  ret void
+}
+
+define void @_Z1gv() nounwind {
+entry:
+  call void @_Z1fv()
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
new file mode 100644
index 0000000..723383e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test add with non-legal types
+
+define void @add_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: add_i1
+; THUMB: add_i1
+  %a.addr = alloca i1, align 4
+  %0 = add i1 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: add_i8
+; THUMB: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: add_i16
+; THUMB: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: or_i1
+; THUMB: or_i1
+  %a.addr = alloca i1, align 4
+  %0 = or i1 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: or_i8
+; THUMB: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: or_i16
+; THUMB: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: sub_i1
+; THUMB: sub_i1
+  %a.addr = alloca i1, align 4
+  %0 = sub i1 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: sub_i8
+; THUMB: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: sub_i16
+; THUMB: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index b7acfaa..625adc2 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
new file mode 100644
index 0000000..a0aba69
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+
+; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
+; non-legal integer types (i.e., i1, i8, i16).
+
+declare void @fooi8(i8)
+declare void @fooi16(i16)
+
+define void @foo(i1 %cmp) nounwind ssp {
+entry:
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ]
+  br i1 %cond, label %cond.true8, label %cond.false8
+
+cond.true8:                                       ; preds = %cond.end
+  br label %cond.end8
+
+cond.false8:                                      ; preds = %cond.end
+  br label %cond.end8
+
+cond.end8:                                        ; preds = %cond.false8, %cond.true8
+  %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ]
+  call void @fooi8(i8 %cond8)
+  br i1 0, label %cond.true16, label %cond.false16
+
+cond.true16:                                       ; preds = %cond.end8
+  br label %cond.end16
+
+cond.false16:                                      ; preds = %cond.end8
+  br label %cond.end16
+
+cond.end16:                                        ; preds = %cond.false16, %cond.true16
+  %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ]
+  call void @fooi16(i16 %cond16)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 695dbba..dd460b2 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 33c6008..1693066 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1a(float %a) uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index 14666a8..686ccad 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test sitofp
 
@@ -94,3 +94,149 @@ entry:
   store double %conv, double* %b.addr, align 8
   ret void
 }
+
+; Test uitofp
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: uitofp_single_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float(float %a) nounwind ssp {
+entry:
+; ARM: fptosi_float
+; ARM: vcvt.s32.f32 s0, s0
+; THUMB: fptosi_float
+; THUMB: vcvt.s32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double(double %a) nounwind ssp {
+entry:
+; ARM: fptosi_double
+; ARM: vcvt.s32.f64 s0, d16
+; THUMB: fptosi_double
+; THUMB: vcvt.s32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float(float %a) nounwind ssp {
+entry:
+; ARM: fptoui_float
+; ARM: vcvt.u32.f32 s0, s0
+; THUMB: fptoui_float
+; THUMB: vcvt.u32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double(double %a) nounwind ssp {
+entry:
+; ARM: fptoui_double
+; ARM: vcvt.u32.f64 s0, d16
+; THUMB: fptoui_double
+; THUMB: vcvt.u32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 028d940..7e147c7 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Target-specific selector can't properly handle the double because it isn't
 ; being passed via a register, so the materialized arguments become dead code.
@@ -15,8 +15,7 @@ entry:
 ; THUMB-NOT: sxtb
 ; THUMB: movs r0, #0
 ; THUMB: movt r0, #0
-; THUMB: add sp, #32
-; THUMb: pop {r7, pc}
+; THUMB: pop
   ret i32 0
 }
 
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index deffe7b..8764bef 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
new file mode 100644
index 0000000..be8035e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1(i8* %x) {
+entry:
+; ARM: t1
+; THUMB: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ARM: bx r0
+; THUMB: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 3ef8bce..e6bdfa7 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
 @temp = common global [60 x i8] zeroinitializer, align 1
 
 define void @t1() nounwind ssp {
 ; ARM: t1
-; ARM: ldr r0, LCPI0_0
+; ARM: movw r0, :lower16:_message1
+; ARM: movt r0, :upper16:_message1
 ; ARM: add r0, r0, #5
 ; ARM: movw r1, #64
 ; ARM: movw r2, #10
 ; ARM: uxtb r1, r1
 ; ARM: bl _memset
 ; THUMB: t1
-; THUMB: ldr.n r0, LCPI0_0
+; THUMB: movw r0, :lower16:_message1
+; THUMB: movt r0, :upper16:_message1
 ; THUMB: adds r0, #5
 ; THUMB: movs r1, #64
 ; THUMB: movt r1, #0
@@ -29,7 +31,8 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @t2() nounwind ssp {
 ; ARM: t2
-; ARM: ldr r0, LCPI1_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
@@ -39,7 +42,8 @@ define void @t2() nounwind ssp {
 ; ARM: ldr r1, [sp]                @ 4-byte Reload
 ; ARM: bl _memcpy
 ; THUMB: t2
-; THUMB: ldr.n r0, LCPI1_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
@@ -55,7 +59,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t3() nounwind ssp {
 ; ARM: t3
-; ARM: ldr r0, LCPI2_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
@@ -63,7 +68,8 @@ define void @t3() nounwind ssp {
 ; ARM: mov r0, r1
 ; ARM: bl _memmove
 ; THUMB: t3
-; THUMB: ldr.n r0, LCPI2_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
@@ -77,26 +83,24 @@ define void @t3() nounwind ssp {
 
 define void @t4() nounwind ssp {
 ; ARM: t4
-; ARM: ldr r0, LCPI3_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: ldr r1, LCPI3_1
-; ARM: ldr r1, [r1]
-; ARM: ldr r2, [r1, #16]
-; ARM: str r2, [r0, #4]
-; ARM: ldr r2, [r1, #20]
-; ARM: str r2, [r0, #8]
-; ARM: ldrh r1, [r1, #24]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
-; THUMB: ldr.n r0, LCPI3_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr.n r1, LCPI3_1
-; THUMB: ldr r1, [r1]
-; THUMB: ldr r2, [r1, #16]
-; THUMB: str r2, [r0, #4]
-; THUMB: ldr r2, [r1, #20]
-; THUMB: str r2, [r0, #8]
-; THUMB: ldrh r1, [r1, #24]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
 ; THUMB: strh r1, [r0, #12]
 ; THUMB: bx lr
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index 0b8a768..2a88678 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index dcfc9d0..e8cc2b2 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; rdar://10418009
 
 define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index daf56e7..b180e43 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; rdar://10412592
 
 ; Note: The Thumb code is being generated by the target-independent selector.
@@ -104,4 +104,4 @@ entry:
 ; THUMB: movt r0, #33023
   call void @foo(i32 -2130706433)
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 4203537..e50c3a4 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=basic < %s
+; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index f7f4521..689b169 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
 
 ; Sign-extend of i1 currently not supported by fast-isel
 ;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
@@ -46,3 +46,12 @@ entry:
 ; CHECK: bx lr
   ret i16 %a
 }
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret6
+; CHECK-NOT: uxth
+; CHECK-NOT: sxth
+; CHECK: bx lr
+  ret i16 %a
+}
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index 9ac63d6..b83a733 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i1 %c) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 648d711..905543a 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -142,21 +142,19 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: ldr.n r0, LCPI4_1
+; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr r0, [r0]
-; THUMB: adds r0, #1
-; THUMB: ldr.n r1, LCPI4_0
-; THUMB: ldr r1, [r1]
-; THUMB: str r0, [r1]
+; THUMB: ldr r1, [r0]
+; THUMB: adds r1, #1
+; THUMB: str r1, [r0]
 
-; ARM: ldr r0, LCPI4_1
+; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
+; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: ldr r0, [r0]
-; ARM: add r0, r0, #1
-; ARM: ldr r1, LCPI4_0
-; ARM: ldr r1, [r1]
-; ARM: str r0, [r1]
+; ARM: ldr r1, [r0]
+; ARM: add r1, r1, #1
+; ARM: str r1, [r0]
 }
 
 ; Check unaligned stores
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index c4dbeb9..87115cc 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
-; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
@@ -60,8 +60,8 @@ entry:
 define float @test5() nounwind {
 entry:
 ; SOFT: test5:
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
 ; SOFT: vbsl [[REG6]], [[REG7]], 
   %0 = tail call double (...)* @bar() nounwind
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index ad03202..80925c7 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,24 +1,16 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
 ; rdar://7461510
+; rdar://10964603
 
+; Disable this optimization unless we know one of them is zero.
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t1:
-; FINITE-NOT: vldr
-; FINITE: ldr
-; FINITE: ldr
-; FINITE: cmp r0, r1
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: beq
-
-; NAN: t1:
-; NAN: vldr s0,
-; NAN: vldr s1,
-; NAN: vcmpe.f32 s1, s0
-; NAN: vmrs apsr_nzcv, fpscr
-; NAN: beq
+; CHECK: t1:
+; CHECK: vldr [[S0:s[0-9]+]],
+; CHECK: vldr [[S1:s[0-9]+]],
+; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: beq
   %0 = load float* %a
   %1 = load float* %b
   %2 = fcmp une float %0, %1
@@ -33,17 +25,21 @@ bb2:
   ret i32 %4
 }
 
+; If one side is zero, the other size sign bit is masked off to allow
+; +0.0 == -0.0
 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
-; FINITE: t2:
-; FINITE-NOT: vldr
-; FINITE: ldrd r0, r1, [r0]
-; FINITE-NOT: b LBB
-; FINITE: cmp r0, #0
-; FINITE: cmpeq r1, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t2:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
+; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK-NOT: b LBB
+; CHECK: cmp [[REG1]], #0
+; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmpeq [[REG2]], #0
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
@@ -59,13 +55,14 @@ bb2:
 
 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t3:
-; FINITE-NOT: vldr
-; FINITE: ldr r0, [r0]
-; FINITE: cmp r0, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t3:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
+; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
+; CHECK: tst [[REG3]], [[REG4]]
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
new file mode 100644
index 0000000..40e8bb2
--- /dev/null
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; Check generated fused MAC and MLS.
+
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest1:
+;CHECK: vfma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fadd double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest2:
+;CHECK: vfma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fadd float %1, %f3
+  ret float %2
+}
+
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest3:
+;CHECK: vfms.f64
+  %1 = fmul double %d2, %d3
+  %2 = fsub double %d1, %1
+  ret double %2
+}
+
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest4:
+;CHECK: vfms.f32
+  %1 = fmul float %f2, %f3
+  %2 = fsub float %f1, %1
+  ret float %2
+}
+
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest5:
+;CHECK: vfnma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double -0.0, %1
+  %3 = fsub double %2, %d3
+  ret double %3
+}
+
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest6:
+;CHECK: vfnma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float -0.0, %1
+  %3 = fsub float %2, %f3
+  ret float %3
+}
+
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest7:
+;CHECK: vfnms.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest8:
+;CHECK: vfnms.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float %1, %f3
+  ret float %2
+}
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+  %mul = fmul <2 x float> %a, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+  %mul = fmul <2 x float> %a, %b
+  %sub = fsub <2 x float> %a, %mul
+  ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %a, %mul
+  ret <4 x float> %sub
+}
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 9f46ae0..893b426 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index b073a05..cd870bb 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,15 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
 cond_true:
+; CHECK: subeq r0, r1, #1
 	%tmp5 = add i32 %b, 1
 	ret i32 %tmp5
 
 cond_false:
+; CHECK: addne r0, r1, #1
 	%tmp7 = add i32 %b, -1
 	ret i32 %tmp7
 }
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 18f87bf..a5082d8 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; rdar://8402126
 ; Make sure if-converter is not predicating vldmia and ldmia. These are
 ; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 3e2c578..eef4de0 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: cmp r2, #1
+; CHECK: cmpne r2, #7
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
 	]
 
 cond_true:
+; CHECK: addne r0
+; CHECK: bxne
 	%tmp12 = add i32 %a, 1
 	%tmp1518 = add i32 %tmp12, %b
 	ret i32 %tmp1518
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3615055..95f5c97 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 2327657..a00deda 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index 1d32322..d188fae 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index d72e9bf..a588bc3 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
 ; rdar://6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
 
 ; Magic ARM pair hints works best with linearscan / fast.
 
@@ -23,3 +25,47 @@ entry:
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
+
+; rdar://10435045 mixed LDRi8/LDRi12
+;
+; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
+; able to generate an LDRD pair here, but this is highly sensitive to
+; regalloc hinting. So, this doubles as a register allocation
+; test. RABasic currently does a better job within the inner loop
+; because of its *lack* of hinting ability. Whereas RAGreedy keeps
+; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
+; destination into R3. We then sensibly split LDRD again rather then
+; evict another live range or use callee saved regs. Sorry if the test
+; is sensitive to Regalloc changes, but it is an interesting case.
+;
+; BASIC: @f
+; BASIC: %bb
+; BASIC: ldrd
+; BASIC: str
+; GREEDY: @f
+; GREEDY: %bb
+; GREEDY: ldr
+; GREEDY: ldr
+; GREEDY: str
+define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+  %0 = add nsw i32 %n, -1                         ; <i32> [#uses=2]
+  %1 = icmp sgt i32 %0, 0                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ]    ; <i32> [#uses=3]
+  %scevgep = getelementptr i32* %a, i32 %i.03     ; <i32*> [#uses=1]
+  %scevgep4 = getelementptr i32* %b, i32 %i.03    ; <i32*> [#uses=1]
+  %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
+  %scevgep5 = getelementptr i32* %a, i32 %tmp     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
+  store i32 %4, i32* %scevgep4, align 4
+  %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
new file mode 100644
index 0000000..dd6c50d
--- /dev/null
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
new file mode 100644
index 0000000..bdd4081
--- /dev/null
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; Codegen should only compare one bit of the loaded value.
+; rdar://10887484
+
+; CHECK: foo:
+; CHECK: ldrb r[[R0:[0-9]+]], [r0]
+; CHECK: tst.w r[[R0]], #1
+define void @foo(i8* %call, double* %p) nounwind {
+entry:
+  %tmp2 = load i8* %call
+  %tmp3 = trunc i8 %tmp2 to i1
+  %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
+  store double %cond, double* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll
new file mode 100644
index 0000000..8068abd
--- /dev/null
+++ b/test/CodeGen/ARM/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: bl log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: bl exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index bf26a96..5b4cf9d 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -12,7 +12,7 @@
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.partition_entry = type { i32, i32, i64, i64 }
 
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f..f566974 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
 @G1 = external global i32
@@ -6,6 +6,7 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
+; CHECK: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
@@ -16,3 +17,31 @@ entry:
     %tmp4 = add i32 %tmp2, %tmp3
     ret i32 %tmp4
 }
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+  %0 = load i32* @foo, align 4
+  %cmp28 = icmp sgt i32 %0, 0
+  br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = icmp sgt i32 %0, 1
+  %smax = select i1 %1, i32 %0, i32 1
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+  unreachable
+
+for.cond1.preheader:                              ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index aeda022..fe0056c 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
@@ -18,6 +19,8 @@ entry:
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
+        ; DARWIN: movs r1, #0
+        ; DARWIN: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
         call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
new file mode 100644
index 0000000..677b9c2
--- /dev/null
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -verify-machineinstrs
+; PR12177
+;
+; This test case spills a QQQQ register.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { %1*, i32, i32, i32, i8 }
+%1 = type { i32 (...)** }
+%2 = type { i8*, i8*, i8*, i32 }
+%3 = type { %4 }
+%4 = type { i32 (...)**, %2, %4*, i8, i8 }
+
+declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind
+
+declare arm_aapcs_vfpcc %0** @func2()
+
+declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
+
+declare arm_aapcs_vfpcc %2** @func4()
+
+define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  %3 = load %0** %2, align 4, !tbaa !0
+  store float 0.000000e+00, float* undef, align 4
+  %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
+  call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
+  %5 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  store float 1.000000e+00, float* undef, align 4
+  call arm_aapcs_vfpcc  void @func1(%0* undef, float* undef, float* undef, %2* undef)
+  store float 1.500000e+01, float* undef, align 4
+  %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
+  %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
+  %8 = fadd float undef, -1.000000e+05
+  store float %8, float* undef, align 16, !tbaa !3
+  %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
+  %10 = fmul float undef, 2.000000e+05
+  %11 = fadd float %10, -1.000000e+05
+  store float %11, float* undef, align 4, !tbaa !3
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare arm_aapcs_vfpcc i32 @rand()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
new file mode 100644
index 0000000..e28b578
--- /dev/null
+++ b/test/CodeGen/ARM/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; ARMGNUEABI: .section        .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; ARMGNUEABI: .section        .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index ea44c28..6bb6743 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -112,11 +112,11 @@ entry:
   ret i32 %conv3
 }
 
+; rdar://10750814
 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
 entry:
 ; CHECK: test9
-; CHECK: rev r0, r0
-; CHECK: lsr r0, r0, #16
+; CHECK: rev16 r0, r0
   %conv = zext i16 %v to i32
   %shr4 = lshr i32 %conv, 8
   %shl = shl nuw nsw i32 %conv, 8
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8a3133a..3a66ec5 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
   %s = or i32 %z, %y
  ret i32 %s
 }
+
+define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: moveq
+; ARM: orreq r2, r2, #1
+
+; T2: t5:
+; T2-NOT: moveq
+; T2: orreq.w r2, r2, #1
+  %tmp1 = icmp eq i32 %a, %b
+  %tmp2 = zext i1 %tmp1 to i32
+  %tmp3 = or i32 %tmp2, %c
+  ret i32 %tmp3
+}
+
+define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t6:
+; ARM-NOT: movge
+; ARM: eorlt r3, r3, r2
+
+; T2: t6:
+; T2-NOT: movge
+; T2: eorlt.w r3, r3, r2
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %c, i32 0
+  %tmp2 = xor i32 %tmp1, %d
+  ret i32 %tmp2
+}
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t7:
+; ARM-NOT: lsleq
+; ARM: andeq r2, r2, r2, lsl #1
+
+; T2: t7:
+; T2-NOT: lsleq.w
+; T2: andeq.w r2, r2, r2, lsl #1
+  %tmp1 = shl i32 %c, 1
+  %cond = icmp eq i32 %a, %b
+  %tmp2 = select i1 %cond, i32 %tmp1, i32 -1
+  %tmp3 = and i32 %c, %tmp2
+  ret i32 %tmp3
+}
+
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 964cef0..521ffa1 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -54,12 +54,12 @@ declare i8* @malloc(...)
 define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
-; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A8: str [[REG]], [r0, r1, lsl #2]
+; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A8: str [[REG]], [r0]
 
 ; A9: test4:
-; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A9: str [[REG]], [r0, r1, lsl #2]
+; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A9: str [[REG]], [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index bf4e55c..057ea11 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic {{.*}}, #15
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
new file mode 100644
index 0000000..e015bf0
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+; We should be able to tail-duplicate the basic block containing the indirectbr
+; into all of its predecessors.
+; CHECK: fn:
+; CHECK: mov pc
+; CHECK: mov pc
+; CHECK: mov pc
+
+@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4
+
+define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
+entry:
+  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+  br label %indirectgoto
+
+INCREMENT:                                        ; preds = %indirectgoto
+  %inc = add nsw i32 %result.0, 1
+  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+  br label %indirectgoto
+
+DECREMENT:                                        ; preds = %indirectgoto
+  %dec = add nsw i32 %result.0, -1
+  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+  br label %indirectgoto
+
+indirectgoto:                                     ; preds = %DECREMENT, %INCREMENT, %entry
+  %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
+  %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
+  %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
+  %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
+  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
+
+RETURN:                                           ; preds = %indirectgoto
+  ret i32 %result.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
new file mode 100644
index 0000000..93340c3
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+; REQUIRES: asserts
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index be95657..0c23879 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -381,3 +381,20 @@ entry:
   store <4 x float> %b, <4 x float> *%p
   ret void
 }
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+  %and.i186 = zext <4 x i1> %x to <4 x i32>
+  %add.i185 = sub <4 x i32> %and.i186, %y
+  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+  %add.i = add <4 x i32> %sub.i, zeroinitializer
+  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 915a84b..fb05a20 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
+;CHECK: vst2update
+;CHECK: vst2.16 {d16, d17}, [r0]!
+	%tmp1 = load <4 x i16>* %B
+	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+	%t5 = getelementptr inbounds i8* %out, i32 16
+	ret i8* %t5
+}
+
+define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
+;CHECK: vst2update2
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
+  %tmp1 = load <4 x float>* %this
+  call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+  %tmp2 = getelementptr inbounds i8* %out, i32  32
+  ret i8* %tmp2
+}
+
 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind