Update to LLVM 3.5a.

Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
author: Stephen Hines <srhines@google.com> 2014-04-23 16:57:46 -0700
committer: Stephen Hines <srhines@google.com> 2014-04-24 15:53:16 -0700
commit: 36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
tree: e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /test/CodeGen/ARM
parent: 69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
download: external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2
353 files changed, 5051 insertions, 938 deletions
diff --git a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
index a0235f7..f8bd886 100644
--- a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
+++ b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null
 
 %struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] }
 @ld = external global %struct.layer_data*               ; <%struct.layer_data**> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index 8d3337c..cf5094f 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep "add.*#0"
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @foo() {
 entry:
@@ -10,3 +10,6 @@ entry:
 }
 
 declare i32 @bar(...)
+
+; CHECK-NOT: add{{.*}}#0
+
diff --git a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
index b3b0769..99e67d5 100644
--- a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
+++ b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null
 
 define i32 @test3() {
 	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
index 670048b..5988c65 100644
--- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep "str.*\!"
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 	%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
 	%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
@@ -32,3 +32,6 @@ bb140:		; preds = %bb140, %cond_false
 bb174:		; preds = %bb140, %cond_false
 	ret %struct.shape_path_t* null
 }
+
+; CHECK-NOT: str{{.*}}!
+
diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
index a604c5c..95aa595 100644
--- a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
+++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
@@ -12,3 +12,6 @@ bb2:		; preds = %bb1
 bb3:		; preds = %bb1
 	ret i32 0
 }
+
+; CHECK-NOT: 255
+
diff --git a/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
index 4cb768e..9f50d92 100644
--- a/test/CodeGen/ARM/2008-07-17-Fdiv.ll
+++ b/test/CodeGen/ARM/2008-07-17-Fdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define float @f(float %a, float %b) nounwind  {
 	%tmp = fdiv float %a, %b
diff --git a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
index 83fde07..e86bc1b 100644
--- a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
+++ b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 ; PR2589
 
 define void @main({ i32 }*) {
diff --git a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
index 601a516..d16ad8c 100644
--- a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o /dev/null
 
 define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
index a1ce384..7bb1429 100644
--- a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
+++ b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 	%struct.hit_t = type { %struct.v_t, double }
 	%struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 }
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index 7342f69..e90c5b3 100644
--- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep "swi 107"
+; RUN: llc -mtriple=arm-eabi -no-integrated-as %s -o - | FileCheck %s
 
 define i32 @_swilseek(i32) nounwind {
 entry:
@@ -18,3 +18,6 @@ return:		; preds = %entry
 	%4 = load i32* %retval		; <i32> [#uses=1]
 	ret i32 %4
 }
+
+; CHECK: swi 107
+
diff --git a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
index f6b3d2c..ade6a10 100644
--- a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
+++ b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 ; PR3795
 
 define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) {
diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll
index 99907fc..606c6b1 100644
--- a/test/CodeGen/ARM/2009-04-08-FREM.ll
+++ b/test/CodeGen/ARM/2009-04-08-FREM.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 declare i32 @printf(i8*, ...)
 
diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index 05d2f26..9e32e05 100644
--- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
 entry:
diff --git a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
index deb092b..5b17463 100644
--- a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
+++ b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 ; PR3954
 
 define void @foo(...) nounwind {
diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
index 7046fcc..2bc7df0 100644
--- a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
+++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
+
 	%struct.List = type { %struct.List*, i32 }
 @Node5 = external constant %struct.List		; <%struct.List*> [#uses=1]
 @"\01LC" = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
index 1e2707f..5d59fc6 100644
--- a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
+++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm | FileCheck %s
-; RUN: llc < %s -march=thumb | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 ; PR4091
 
 define void @foo(i32 %i, i32* %p) nounwind {
diff --git a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
index e1e94b6..3cef0aa 100644
--- a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
+++ b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null
 
 define void @test(i8* %x) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
index 6761687..bc4a95c 100644
--- a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
+++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
 	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
diff --git a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
deleted file mode 100644
index 392c70a..0000000
--- a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s
-
-; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
-
-@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
-
-; CHECK: .globl l_objc_msgSend_fixup_alloc
-; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
index ee99c70..b078ec0 100644
--- a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
+++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 ; pr4843
+
 define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v2regbug:
 ;CHECK: vzip.16
diff --git a/test/CodeGen/ARM/2009-09-10-postdec.ll b/test/CodeGen/ARM/2009-09-10-postdec.ll
index 10653b5..66ffe6a 100644
--- a/test/CodeGen/ARM/2009-09-10-postdec.ll
+++ b/test/CodeGen/ARM/2009-09-10-postdec.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 ; Radar 7213850
 
 define i32 @test(i8* %d, i32 %x, i32 %y) nounwind {
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
index 758b59a..dd9a6fd 100644
--- a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9 %s -o /dev/null
 
 define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
   %1 = ptrtoint i8* %pBuffer to i32
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index eb9c2d0..224bd01 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 ; pr4926
 
 define void @test_vget_lanep16() nounwind {
diff --git a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
index b0b4cb3..5e75d46 100644
--- a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
+++ b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
   %1 = sub i32 undef, 48                          ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-04-09-NeonSelect.ll b/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
index 89d6a68..ceef083 100644
--- a/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
+++ b/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=arm -mattr=+neon < %s
-; Radar 7770501: Don't crash on SELECT and SELECT_CC with NEON vector values.
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o /dev/null
+; rdar://7770501 : Don't crash on SELECT and SELECT_CC with NEON vector values.
 
 define void @vDSP_FFT16_copv(float* nocapture %O, float* nocapture %I, i32 %Direction) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-04-14-SplitVector.ll b/test/CodeGen/ARM/2010-04-14-SplitVector.ll
index 5d0c3cf..cb3e042 100644
--- a/test/CodeGen/ARM/2010-04-14-SplitVector.ll
+++ b/test/CodeGen/ARM/2010-04-14-SplitVector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=arm1136jf-s
+; RUN: llc -mtriple=arm-eabi -mcpu=arm1136jf-s %s -o /dev/null
 ; Radar 7854640
 
 define void @test() nounwind {
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index e0f50c9..cfaffd8 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -O0 -optimize-regalloc -regalloc=basic %s -o /dev/null
 
 ; This test would crash the rewriter when trying to handle a spill after one of
 ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index a400b7b..5bc08b0 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 ; Radar 7872877
 
 define void @test(float* %fltp, i32 %packedValue, float* %table) nounwind {
diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
index 6f48796..f7ceb6e 100644
--- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
+++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+neon
-; Radar 8084742
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o /dev/null
+; rdar://8084742
 
 %struct.__int8x8x2_t = type { [2 x <8 x i8>] }
 
diff --git a/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll b/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
index 984583e..fcabc90 100644
--- a/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
+++ b/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o /dev/null
 
 @.str271 = external constant [21 x i8], align 4   ; <[21 x i8]*> [#uses=1]
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8**)* @main to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index 2842437..80822c2 100644
--- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -1,4 +1,4 @@
-; RUN: llc -enable-correct-eh-support < %s
+; RUN: llc < %s
 ; PR7716
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10.0.0"
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 7aacd1a..bc4cc98 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -124,6 +124,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !45 = metadata !{i32 27, i32 0, metadata !39, null}
 !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
-!47 = metadata !{i32 0}
+!47 = metadata !{}
 !48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
 !49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index eef6abd..4baee64 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -5,11 +5,11 @@ define hidden void @foo() nounwind ssp {
 entry:
 ; CHECK-LABEL: foo:
 ; CHECK: mov r7, sp
-; CHECK-NEXT: vpush {d8}
 ; CHECK-NEXT: vpush {d10, d11}
+; CHECK-NEXT: vpush {d8}
   tail call void asm sideeffect "","~{d8},~{d10},~{d11}"() nounwind
-; CHECK: vpop {d10, d11}
-; CHECK-NEXT: vpop {d8}
+; CHECK: vpop {d8}
+; CHECK-NEXT: vpop {d10, d11}
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index f57411b..b1d59aa 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -17,7 +17,7 @@ target triple = "thumbv7-apple-darwin10"
 ; DW_OP_constu
 ; offset
 
-;CHECK: .long Lset6
+;CHECK: .long Lset7
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
@@ -80,7 +80,7 @@ entry:
 
 !0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !48, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !47, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !5}
 !5 = metadata !{i32 786468, metadata !47, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
@@ -126,5 +126,5 @@ entry:
 !45 = metadata !{metadata !24, metadata !25}
 !46 = metadata !{metadata !27, metadata !28}
 !47 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
-!48 = metadata !{i32 0}
+!48 = metadata !{}
 !49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2011-04-12-AlignBug.ll b/test/CodeGen/ARM/2011-04-12-AlignBug.ll
index 317be94..97297f7 100644
--- a/test/CodeGen/ARM/2011-04-12-AlignBug.ll
+++ b/test/CodeGen/ARM/2011-04-12-AlignBug.ll
@@ -3,9 +3,9 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-darwin10.0.0"
 
 ; CHECK: align 3
-@.v = linker_private unnamed_addr constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>, align 8
+@.v = private unnamed_addr constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>, align 8
 ; CHECK: align 2
-@.strA = linker_private unnamed_addr constant [4 x i8] c"bar\00"
+@.strA = private unnamed_addr constant [4 x i8] c"bar\00"
 ; CHECK-NOT: align
-@.strB = linker_private unnamed_addr constant [4 x i8] c"foo\00", align 1
-@.strC = linker_private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1
+@.strB = private unnamed_addr constant [4 x i8] c"foo\00", align 1
+@.strC = private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1
diff --git a/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll
index 7f0f795..12cdd04 100644
--- a/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-09-TailCallByVal.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=pic -mcpu=cortex-a8 -arm-tail-calls=1 | FileCheck %s
+; RUN: llc < %s -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 101a913..d93cc57 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
+; RUN: llc < %s | FileCheck %s
 
 ; tail call inside a function where byval argument is splitted between
 ; registers and stack is currently unsupported.
 ; XFAIL: *
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-ios"
+target triple = "thumbv7-apple-ios5.0"
 
 %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
 %struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index bb78707..ed2840b 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -8,7 +8,7 @@
 ; DW_OP_constu
 ; offset
 
-;CHECK: .long Lset8
+;CHECK: .long Lset9
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
@@ -75,7 +75,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!49}
 
-!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !41, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !48, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get1", metadata !"get1", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [get1]
 !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
@@ -123,5 +123,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !45 = metadata !{metadata !19, metadata !20}
 !46 = metadata !{metadata !27, metadata !28}
 !47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"}
-!48 = metadata !{i32 0}
+!48 = metadata !{}
 !49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
index 03614ed..17bd291 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -6,10 +6,10 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 target triple = "thumbv7-apple-ios5.0.0"
 
 ; CHECK-GENERIC:      strb
-; CHECK-GENERIT-NEXT: strb
-; CHECK-GENERIT-NEXT: strb
-; CHECK-GENERIT-NEXT: strb
-; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIC-NEXT: strb
+; CHECK-GENERIC-NEXT: strb
+; CHECK-GENERIC-NEXT: strb
+; CHECK-GENERIC-NEXT: strb
 ; CHECK-UNALIGNED:    strb
 ; CHECK-UNALIGNED:    str
 define void @foo(i8* nocapture %c) nounwind optsize {
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
index 850c511..c8e08c2 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 
 ; Trigger multiple NEON stores.
 ; CHECK: vst1.64
diff --git a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
index 8a65f2e..a707a92 100644
--- a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
+++ b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 ; PR11319
 
 @i8_res  = global <2 x i8> <i8 0, i8 0>
diff --git a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
index 42eb32d..c1554d8 100644
--- a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
+++ b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 ; PR11319
 
 @src1_v2i16 = global <2 x i16> <i16 0, i16 1>
diff --git a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
index 719571b..c50461a 100644
--- a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
+++ b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <2 x i32> @test1(<2 x double>* %A) {
 ; CHECK: test1
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index a263c9c..86b58c8 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mcpu=cortex-a9 %s -o - | FileCheck %s
 
 @A = global <4 x float> <float 0., float 1., float 2., float 3.>
 
diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
index 089dc91..9b71be2 100644
--- a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
+++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -enable-unsafe-fp-math %s -o /dev/null
 ;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 ;target triple = "armv7-none-linux-gnueabi"
 
diff --git a/test/CodeGen/ARM/2012-05-04-vmov.ll b/test/CodeGen/ARM/2012-05-04-vmov.ll
index 14dbf7f..c604eed 100644
--- a/test/CodeGen/ARM/2012-05-04-vmov.ll
+++ b/test/CodeGen/ARM/2012-05-04-vmov.ll
@@ -1,5 +1,9 @@
-; RUN: llc -O1 -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=A9-CHECK %s
-; RUN: llc -O1 -march=arm -mcpu=swift < %s | FileCheck -check-prefix=SWIFT-CHECK %s
+; RUN: llc -O1 -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \
+; RUN:  | FileCheck -check-prefix=A9-CHECK %s
+
+; RUN: llc -O1 -mtriple=arm-eabi -mcpu=swift %s -o - \
+; RUN:  | FileCheck -check-prefix=SWIFT-CHECK %s
+
 ; Check that swift doesn't use vmov.32. <rdar://problem/10453003>.
 
 define <2 x i32> @testuvec(<2 x i32> %A, <2 x i32> %B) nounwind {
diff --git a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
index dd67843..7f30ae1 100644
--- a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
+++ b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm -mcpu=swift < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s
 ; <rdar://problem/10451892>
 
 define void @f(i32 %x, i32* %p) nounwind ssp {
diff --git a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
index 647ebd6..e8d4fb2 100644
--- a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
+++ b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 ; PR12281
 ; Test generataion of code for vmull instruction when multiplying 128-bit
diff --git a/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll b/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll
index 3bdbb3c..8d77763 100644
--- a/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll
+++ b/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=arm7tdmi | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=arm7tdmi %s -o - | FileCheck %s
 
 ; movw is only legal for V6T2 and later.
 ; rdar://12300648
diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
index 38624e0..5235e9c 100644
--- a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
+++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
@@ -1,4 +1,4 @@
-; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - 2>&1 | FileCheck %s
 
 ; Check for error message:
 ; CHECK: non-trivial scalar-to-vector conversion, possible invalid constraint for vector type
diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
index 7ba693d..d389b5c 100644
--- a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
+++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
@@ -1,4 +1,4 @@
-; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - 2>&1 | FileCheck %s
 
 ; Check for error message:
 ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type
diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
index 127429b..c5eba7d 100644
--- a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
+++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
@@ -4,8 +4,8 @@
 ;CHECK-LABEL: foo:
 ;CHECK: 	sub	sp, sp, #8
 ;CHECK: 	push	{r11, lr}
-;CHECK: 	str	r0, [sp, #8]
-;CHECK: 	add	r0, sp, #8
+;CHECK: 	str	r0, [sp, #12]
+;CHECK: 	add	r0, sp, #12
 ;CHECK: 	bl	fooUseParam
 ;CHECK: 	pop	{r11, lr}
 ;CHECK: 	add	sp, sp, #8
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
index 08bf99b..6bd23b1 100644
--- a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
@@ -72,7 +72,7 @@ define void @foo(double %p0, ; --> D0
 		 double %p8, ; --> Stack
 		 i32 %p9) #0 { ; --> R0, not Stack+8
 entry:
-  tail call void @fooUseI32(i32 %p9)
+  call void @fooUseI32(i32 %p9)
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
index 6db71fe..e79a3ba 100644
--- a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
@@ -23,9 +23,9 @@ define void @foo(double %vfp0,     ; --> D0,     NSAA=SP
 entry:
   ;CHECK: sub sp, #8
   ;CHECK: push.w {r11, lr}
-  ;CHECK: add r0, sp, #16
-  ;CHECK: str r2, [sp, #20]
-  ;CHECK: str r1, [sp, #16]
+  ;CHECK: add r0, sp, #8
+  ;CHECK: str r2, [sp, #12]
+  ;CHECK: str r1, [sp, #8]
   ;CHECK: bl  fooUseStruct
   call void @fooUseStruct(%st_t* %p1)
   ret void
diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
index c4f5f54..480d087 100644
--- a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
+++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
@@ -72,6 +72,27 @@ KBBlockZero.exit:                                 ; preds = %bb2.i
   indirectbr i8* undef, [label %KBBlockZero_return_1, label %KBBlockZero_return_0]
 }
 
+@foo = global i32 ()* null
+define i32 @t4(i32 %x, i32 ()* %p_foo) {
+entry:
+;CHECK-LABEL: t4:
+;CHECK-V8-LABEL: t4:
+  %cmp = icmp slt i32 %x, 60
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %tmp.2 = call i32 %p_foo()
+  %sub = add nsw i32 %x, -1
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %sub1 = add nsw i32 %x, -120
+  br label %return
+
+return:                                           ; preds = %if.end5, %if.then4, %if.then
+  %retval.0 = phi i32 [ %sub, %if.then ], [ %sub1, %if.else ]
+  ret i32 %retval.0
+}
 
 ; If-converter was checking for the wrong predicate subsumes pattern when doing
 ; nested predicates.
diff --git a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
new file mode 100644
index 0000000..6c0fbd0
--- /dev/null
+++ b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -print-before=post-RA-sched %s -o - 2>&1 \
+; RUN:  | FileCheck %s
+
+define void @vst(i8* %m, [4 x i64] %v) {
+entry:
+; CHECK: vst:
+; CHECK: VST1d64Q %R{{[0-9]+}}<kill>, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
+
+  %v0 = extractvalue [4 x i64] %v, 0
+  %v1 = extractvalue [4 x i64] %v, 1
+  %v2 = extractvalue [4 x i64] %v, 2
+  %v3 = extractvalue [4 x i64] %v, 3
+
+  %t0 = bitcast i64 %v0 to <8 x i8>
+  %t1 = bitcast i64 %v1 to <8 x i8>
+  %t2 = bitcast i64 %v2 to <8 x i8>
+  %t3 = bitcast i64 %v3 to <8 x i8>
+
+  %s0 = bitcast <8 x i8> %t0 to <1 x i64>
+  %s1 = bitcast <8 x i8> %t1 to <1 x i64>
+  %s2 = bitcast <8 x i8> %t2 to <1 x i64>
+  %s3 = bitcast <8 x i8> %t3 to <1 x i64>
+
+  %tmp0 = bitcast <1 x i64> %s2 to i64
+  %tmp1 = bitcast <1 x i64> %s3 to i64
+
+  %n0 = insertelement <2 x i64> undef, i64 %tmp0, i32 0
+  %n1 = insertelement <2 x i64> %n0, i64 %tmp1, i32 1
+
+  call void @llvm.arm.neon.vst4.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8)
+
+  call void @bar(<2 x i64> %n1)
+
+  ret void
+}
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
+; CHECK: vtbx4:
+; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = load <8 x i8>* %C
+	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+  call void @bar2(%struct.__neon_int8x8x4_t %tmp2, <8 x i8> %tmp8)
+	ret <8 x i8> %tmp8
+}
+
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare <8 x i8>  @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare void @bar2(%struct.__neon_int8x8x4_t, <8 x i8>)
+declare void @bar(<2 x i64> %arg)
diff --git a/test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll b/test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll
new file mode 100644
index 0000000..4c36a2a
--- /dev/null
+++ b/test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -o - -filetype=asm | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv8-none--eabi"
+
+; CHECK-LABEL: fn1:
+define arm_aapcs_vfpcc float @fn1(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i, float %j) {
+  ret float %j
+; CHECK: vldr    s0, [sp, #8]
+}
+
+; CHECK-LABEL: fn2:
+define arm_aapcs_vfpcc float @fn2(double %a, double %b, double %c, double %d, double %e, double %f, float %h, <4 x float> %i, float %j) {
+  ret float %j
+; CHECK: vldr    s0, [sp, #16]
+}
+
+; CHECK-LABEL: fn3:
+define arm_aapcs_vfpcc float @fn3(float %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, float %j) #0 {
+  ret float %j
+; CHECK: vldr    s0, [sp, #8]
+}
diff --git a/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll b/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll
new file mode 100644
index 0000000..33bfa2f
--- /dev/null
+++ b/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll
@@ -0,0 +1,114 @@
+; RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
+
+%struct4bytes = type { i32 }
+%struct8bytes8align = type { i64 }
+%struct12bytes = type { i32, i32, i32 }
+
+declare void @useIntPtr(%struct4bytes*)
+declare void @useLong(i64)
+declare void @usePtr(%struct8bytes8align*)
+
+; a -> r0
+; b -> r1..r3
+; c -> sp+0..sp+7
+define void @foo1(i32 %a, %struct12bytes* byval %b, i64 %c) {
+; CHECK-LABEL: foo1
+; CHECK: sub  sp, sp, #16
+; CHECK: push  {r11, lr}
+; CHECK: add  [[SCRATCH:r[0-9]+]], sp, #12
+; CHECK: stm  [[SCRATCH]], {r1, r2, r3}
+; CHECK: ldr  r0, [sp, #24]
+; CHECK: ldr  r1, [sp, #28]
+; CHECK: bl  useLong
+; CHECK: pop  {r11, lr}
+; CHECK: add  sp, sp, #16
+
+  call void @useLong(i64 %c)
+  ret void
+}
+
+; a -> r0
+; b -> r2..r3
+define void @foo2(i32 %a, %struct8bytes8align* byval %b) {
+; CHECK-LABEL: foo2
+; CHECK: sub  sp, sp, #8
+; CHECK: push  {r11, lr}
+; CHECK: add  r0, sp, #8
+; CHECK: str  r3, [sp, #12]
+; CHECK: str  r2, [sp, #8]
+; CHECK: bl   usePtr
+; CHECK: pop  {r11, lr}
+; CHECK: add  sp, sp, #8
+
+  call void @usePtr(%struct8bytes8align* %b)
+  ret void
+}
+
+; a -> r0..r1
+; b -> r2
+define void @foo3(%struct8bytes8align* byval %a, %struct4bytes* byval %b) {
+; CHECK-LABEL: foo3
+; CHECK: sub  sp, sp, #16
+; CHECK: push  {r11, lr}
+; CHECK: add  [[SCRATCH:r[0-9]+]], sp, #8
+; CHECK: stm  [[SCRATCH]], {r0, r1, r2}
+; CHECK: add  r0, sp, #8
+; CHECK: bl   usePtr
+; CHECK: pop  {r11, lr}
+; CHECK: add  sp, sp, #16
+
+  call void @usePtr(%struct8bytes8align* %a)
+  ret void
+}
+
+; a -> r0
+; b -> r2..r3
+define void @foo4(%struct4bytes* byval %a, %struct8bytes8align* byval %b) {
+; CHECK-LABEL: foo4
+; CHECK: sub     sp, sp, #16
+; CHECK: push    {r11, lr}
+; CHECK: str     r0, [sp, #8]
+; CHECK: add     r0, sp, #16
+; CHECK: str     r3, [sp, #20]
+; CHECK: str     r2, [sp, #16]
+; CHECK: bl      usePtr
+; CHECK: pop     {r11, lr}
+; CHECK: add     sp, sp, #16
+; CHECK: mov     pc, lr
+
+  call void @usePtr(%struct8bytes8align* %b)
+  ret void
+}
+
+; a -> r0..r1
+; b -> r2
+; c -> r3
+define void @foo5(%struct8bytes8align* byval %a, %struct4bytes* byval %b, %struct4bytes* byval %c) {
+; CHECK-LABEL: foo5
+; CHECK: sub     sp, sp, #16
+; CHECK: push    {r11, lr}
+; CHECK: add     [[SCRATCH:r[0-9]+]], sp, #8
+; CHECK: stm     [[SCRATCH]], {r0, r1, r2, r3}
+; CHECK: add     r0, sp, #8
+; CHECK: bl      usePtr
+; CHECK: pop     {r11, lr}
+; CHECK: add     sp, sp, #16
+; CHECK: mov     pc, lr
+
+  call void @usePtr(%struct8bytes8align* %a)
+  ret void
+}
+
+; a..c -> r0..r2
+; d -> sp+0..sp+7
+define void @foo6(i32 %a, i32 %b, i32 %c, %struct8bytes8align* byval %d) {
+; CHECK-LABEL: foo6
+; CHECK: push {r11, lr}
+; CHECK: add  r0, sp, #8
+; CHECK: bl   usePtr
+; CHECK: pop  {r11, lr}
+; CHECK: mov  pc, lr
+
+  call void @usePtr(%struct8bytes8align* %d)
+  ret void
+}
diff --git a/test/CodeGen/ARM/DbgValueOtherTargets.test b/test/CodeGen/ARM/DbgValueOtherTargets.test
index bf90891..9ce2459 100644
--- a/test/CodeGen/ARM/DbgValueOtherTargets.test
+++ b/test/CodeGen/ARM/DbgValueOtherTargets.test
@@ -1 +1 @@
-RUN: llc -O0 -march=arm -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -mtriple=arm-eabi -asm-verbose %S/../Inputs/DbgValueOtherTargets.ll -o - | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/ARM/Windows/aapcs.ll b/test/CodeGen/ARM/Windows/aapcs.ll
new file mode 100644
index 0000000..3f9a09f
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/aapcs.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s | FileCheck %s
+
+; AAPCS mandates an 8-byte stack alignment.  The alloca is implicitly aligned,
+; and no bic is required.
+
+declare void @callee(i8 *%i)
+
+define void @caller() {
+  %i = alloca i8, align 8
+  call void @callee(i8* %i)
+  ret void
+}
+
+; CHECK: sub sp, #8
+; CHECK-NOT: bic
+
diff --git a/test/CodeGen/ARM/Windows/hard-float.ll b/test/CodeGen/ARM/Windows/hard-float.ll
new file mode 100644
index 0000000..f7b7ec2
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/hard-float.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s | FileCheck %s
+
+define float @function(float %f, float %g) nounwind {
+entry:
+  %h = fadd float %f, %g
+  ret float %h
+}
+
+; CHECK: vadd.f32 s0, s0, s1
+
diff --git a/test/CodeGen/ARM/Windows/mangling.ll b/test/CodeGen/ARM/Windows/mangling.ll
new file mode 100644
index 0000000..ce1fe2e
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/mangling.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -o - %s | FileCheck %s
+
+define void @function() nounwind {
+entry:
+  ret void
+}
+
+; CHECK-LABEL: function
+
diff --git a/test/CodeGen/ARM/Windows/no-aeabi.ll b/test/CodeGen/ARM/Windows/no-aeabi.ll
new file mode 100644
index 0000000..4c6676f
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/no-aeabi.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s | FileCheck %s
+
+define i32 @divide(i32 %i, i32 %j) nounwind {
+entry:
+  %quotient = sdiv i32 %i, %j
+  ret i32 %quotient
+}
+
+; CHECK-NOT: __aeabi_idiv
+
diff --git a/test/CodeGen/ARM/Windows/no-arm-mode.ll b/test/CodeGen/ARM/Windows/no-arm-mode.ll
new file mode 100644
index 0000000..6db031f
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/no-arm-mode.ll
@@ -0,0 +1,5 @@
+; RUN: not llc -mtriple=armv7-windows-itanium -mcpu=cortex-a9 -o /dev/null %s 2>&1 \
+; RUN:  | FileCheck %s
+
+; CHECK: does not support ARM mode execution
+
diff --git a/test/CodeGen/ARM/Windows/no-ehabi.ll b/test/CodeGen/ARM/Windows/no-ehabi.ll
new file mode 100644
index 0000000..4119b6d
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/no-ehabi.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -o - %s | FileCheck %s
+
+declare void @callee(i32 %i)
+
+define i32 @caller(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o,
+                   i32 %p) {
+entry:
+  %q = add nsw i32 %j, %i
+  %r = add nsw i32 %q, %k
+  %s = add nsw i32 %r, %l
+  call void @callee(i32 %s)
+  %t = add nsw i32 %n, %m
+  %u = add nsw i32 %t, %o
+  %v = add nsw i32 %u, %p
+  call void @callee(i32 %v)
+  %w = add nsw i32 %v, %s
+  ret i32 %w
+}
+
+; CHECK-NOT: .save {{{.*}}}
+
diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll
index 019ff61..5e5ca4b 100644
--- a/test/CodeGen/ARM/a15-SD-dep.ll
+++ b/test/CodeGen/ARM/a15-SD-dep.ll
@@ -56,3 +56,62 @@ define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
   %i2 = fadd <4 x float> %i1, %i1
   ret <4 x float> %i2
 }
+
+; Test that DPair can be successfully passed as QPR.
+; CHECK-ENABLED-LABEL: test_DPair1:
+; CHECK-DISABLED-LABEL: test_DPair1:
+define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) {
+entry:
+  %0 = insertelement <4 x float> undef, float %x, i32 1
+  %1 = insertelement <4 x float> %0, float %y, i32 0
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1]
+  ; CHECK-DISABLED-NOT: vdup
+  switch i32 %vsout, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 0, label %sw.bb6
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 0
+  br label %sw.bb6
+
+sw.bb6:                                           ; preds = %sw.bb, %entry
+  %sum.0 = phi <4 x float> [ %1, %entry ], [ %2, %sw.bb ]
+  %3 = extractelement <4 x float> %sum.0, i32 0
+  %conv = fptoui float %3 to i8
+  store i8 %conv, i8* %out, align 1
+  ret void
+
+sw.epilog:                                        ; preds = %entry
+  ret void
+}
+
+; CHECK-ENABLED-LABEL: test_DPair2:
+; CHECK-DISABLED-LABEL: test_DPair2:
+define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) {
+entry:
+  %0 = insertelement <4 x float> undef, float %x, i32 0
+  ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-DISABLED-NOT: vdup
+  switch i32 %vsout, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 0, label %sw.bb1
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %1 = insertelement <4 x float> %0, float 0.000000e+00, i32 0
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %sum.0 = phi <4 x float> [ %0, %entry ], [ %1, %sw.bb ]
+  %2 = extractelement <4 x float> %sum.0, i32 0
+  %conv = fptoui float %2 to i8
+  store i8 %conv, i8* %out, align 1
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb1
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/a15-mla.ll b/test/CodeGen/ARM/a15-mla.ll
index b233cc2..9867e27 100644
--- a/test/CodeGen/ARM/a15-mla.ll
+++ b/test/CodeGen/ARM/a15-mla.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s  -march=arm -float-abi=hard -mcpu=cortex-a15 -mattr=+neon,+neonfp | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=hard -mcpu=cortex-a15 -mattr=+neon,+neonfp %s -o - \
+; RUN:  | FileCheck %s
 
 ; This test checks that the VMLxForwarting feature is disabled for A15.
 ; CHECK: fun_a:
diff --git a/test/CodeGen/ARM/a15.ll b/test/CodeGen/ARM/a15.ll
index 6f816c1..9f0b280 100644
--- a/test/CodeGen/ARM/a15.ll
+++ b/test/CodeGen/ARM/a15.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -mcpu=cortex-a15 | FileCheck %s
+; RUN: llc -mtriple=arm -mcpu=cortex-a15 %s -o - | FileCheck %s
 
 ; CHECK: a
 define i32 @a(i32 %x) {
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index 748d258..8fd1da7 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4
+; RUN: llc -mtriple=arm-eabi -stats %s -o - 2>&1 | FileCheck %s
 
 define i32 @t1(i32 %a) {
 	%b = mul i32 %a, 9
@@ -14,3 +14,6 @@ define i32 @t2(i32 %a) {
         %d = load i32* %c
 	ret i32 %d
 }
+
+; CHECK: 4 asm-printer
+
diff --git a/test/CodeGen/ARM/addrspacecast.ll b/test/CodeGen/ARM/addrspacecast.ll
index 2e98ba5..7b6237d 100644
--- a/test/CodeGen/ARM/addrspacecast.ll
+++ b/test/CodeGen/ARM/addrspacecast.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 ; Check that codegen for an addrspace cast succeeds without error.
 define <4 x i32 addrspace(1)*> @f (<4 x i32*> %x) {
diff --git a/test/CodeGen/ARM/arm-abi-attr.ll b/test/CodeGen/ARM/arm-abi-attr.ll
new file mode 100644
index 0000000..f3923ae
--- /dev/null
+++ b/test/CodeGen/ARM/arm-abi-attr.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=arm-linux < %s | FileCheck %s --check-prefix=APCS
+; RUN: llc -mtriple=arm-linux -mattr=apcs < %s | \
+; RUN: FileCheck %s --check-prefix=APCS
+; RUN: llc -mtriple=arm-linux-gnueabi -mattr=apcs < %s | \
+; RUN: FileCheck %s --check-prefix=APCS
+
+; RUN: llc -mtriple=arm-linux-gnueabi < %s | FileCheck %s --check-prefix=AAPCS
+; RUN: llc -mtriple=arm-linux-gnueabi -mattr=aapcs < %s | \
+; RUN: FileCheck %s --check-prefix=AAPCS
+; RUN: llc -mtriple=arm-linux-gnu -mattr=aapcs < %s | \
+; RUN: FileCheck %s --check-prefix=AAPCS
+
+; The stack is 8 byte aligned on AAPCS and 4 on APCS, so we should get a BIC
+; only on APCS.
+
+define void @g() {
+; APCS: sub	sp, sp, #8
+; APCS: bic	sp, sp, #7
+
+; AAPCS: sub	sp, sp, #8
+; AAPCS-NOT: bic
+
+  %c = alloca i8, align 8
+  call void @f(i8* %c)
+  ret void
+}
+
+declare void @f(i8*)
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 88d797e..bf827d6 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s
-; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s
-; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=V8 %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck -check-prefix=ARM %s
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck -check-prefix=THUMB %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
+; RUN:   | FileCheck -check-prefix=T2 %s
+; RUN: llc -mtriple=thumbv8-eabi %s -o - | FileCheck -check-prefix=V8 %s
 
 ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
 
diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll
index 2e35e39..e869abe 100644
--- a/test/CodeGen/ARM/arm-asm.ll
+++ b/test/CodeGen/ARM/arm-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define void @frame_dummy() {
 entry:
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 8548642..580f7e7 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 -no-integrated-as %s -o - | FileCheck %s
 
 define i32 @foo(float %scale, float %scale2) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index fb0f8ff..7decb97 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 ; This loop is rewritten with an indvar which counts down, which
 ; frees up a register from holding the trip count.
diff --git a/test/CodeGen/ARM/arm-ttype-target2.ll b/test/CodeGen/ARM/arm-ttype-target2.ll
index 8b5087f..4d61cb5 100644
--- a/test/CodeGen/ARM/arm-ttype-target2.ll
+++ b/test/CodeGen/ARM/arm-ttype-target2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=armv7-none-linux-gnueabi -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s 
+; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
 
 @_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
 @_ZTS3Foo = linkonce_odr constant [5 x i8] c"3Foo\00"
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 0477d4f..a881d5f 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -55,8 +55,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test3:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -65,8 +65,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test3:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -80,8 +80,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test4:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -90,8 +90,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test4:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -105,8 +105,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test5:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -115,8 +115,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test5:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -151,8 +151,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-LABEL: test7:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: cmp [[REG1]]
-; CHECK: cmpeq [[REG2]]
+; CHECK-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1
+; CHECK-DAG: eor     [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
+; CHECK: orrs    {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
 ; CHECK: bne
 ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
@@ -162,16 +163,16 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-THUMB-LABEL: test7:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: cmp [[REG1]]
-; CHECK-THUMB: it eq
-; CHECK-THUMB: cmpeq [[REG2]]
+; CHECK-THUMB-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
+; CHECK-THUMB-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
+; CHECK-THUMB: orrs    [[MISMATCH_HI]], [[MISMATCH_LO]]
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-  %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst
+  %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
   ret i64 %r
 }
 
@@ -216,9 +217,18 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test10:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
-; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: blt
+; CHECK: mov     [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK: mov     [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK: mov     [[OUT_HI:[a-z0-9]+]], r2
+; CHECK: cmp     [[REG1]], r1
+; CHECK: movwls  [[CARRY_LO]], #1
+; CHECK: cmp     [[REG2]], r2
+; CHECK: movwle  [[CARRY_HI]], #1
+; CHECK: moveq   [[CARRY_HI]], [[CARRY_LO]]
+; CHECK: cmp     [[CARRY_HI]], #0
+; CHECK: movne   [[OUT_HI]], [[REG2]]
+; CHECK: mov     [[OUT_LO:[a-z0-9]+]], r1
+; CHECK: movne   [[OUT_LO]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -227,9 +237,18 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test10:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
-; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
-; CHECK-THUMB: blt
+; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK-THUMB: cmp     [[REG1]], r2
+; CHECK-THUMB: movls.w  [[CARRY_LO]], #1
+; CHECK-THUMB: cmp     [[REG2]], r3
+; CHECK-THUMB: movle  [[CARRY_HI]], #1
+; CHECK-THUMB: moveq   [[CARRY_HI]], [[CARRY_LO]]
+; CHECK-THUMB: mov     [[OUT_HI:[a-z0-9]+]], r3
+; CHECK-THUMB: cmp     [[CARRY_HI]], #0
+; CHECK-THUMB: mov     [[OUT_LO:[a-z0-9]+]], r2
+; CHECK-THUMB: movne   [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne   [[OUT_LO]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -243,9 +262,18 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test11:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
-; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: blo
+; CHECK: mov     [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK: mov     [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK: mov     [[OUT_HI:[a-z0-9]+]], r2
+; CHECK: cmp     [[REG1]], r1
+; CHECK: movwls  [[CARRY_LO]], #1
+; CHECK: cmp     [[REG2]], r2
+; CHECK: movwls  [[CARRY_HI]], #1
+; CHECK: moveq   [[CARRY_HI]], [[CARRY_LO]]
+; CHECK: cmp     [[CARRY_HI]], #0
+; CHECK: movne   [[OUT_HI]], [[REG2]]
+; CHECK: mov     [[OUT_LO:[a-z0-9]+]], r1
+; CHECK: movne   [[OUT_LO]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -255,9 +283,18 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test11:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
-; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
-; CHECK-THUMB: blo
+; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK-THUMB: cmp     [[REG1]], r2
+; CHECK-THUMB: movls.w  [[CARRY_LO]], #1
+; CHECK-THUMB: cmp     [[REG2]], r3
+; CHECK-THUMB: movls  [[CARRY_HI]], #1
+; CHECK-THUMB: moveq   [[CARRY_HI]], [[CARRY_LO]]
+; CHECK-THUMB: mov     [[OUT_HI:[a-z0-9]+]], r3
+; CHECK-THUMB: cmp     [[CARRY_HI]], #0
+; CHECK-THUMB: mov     [[OUT_LO:[a-z0-9]+]], r2
+; CHECK-THUMB: movne   [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne   [[OUT_LO]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -271,9 +308,18 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test12:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
-; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: bge
+; CHECK: mov     [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK: mov     [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK: mov     [[OUT_HI:[a-z0-9]+]], r2
+; CHECK: cmp     [[REG1]], r1
+; CHECK: movwhi  [[CARRY_LO]], #1
+; CHECK: cmp     [[REG2]], r2
+; CHECK: movwgt  [[CARRY_HI]], #1
+; CHECK: moveq   [[CARRY_HI]], [[CARRY_LO]]
+; CHECK: cmp     [[CARRY_HI]], #0
+; CHECK: movne   [[OUT_HI]], [[REG2]]
+; CHECK: mov     [[OUT_LO:[a-z0-9]+]], r1
+; CHECK: movne   [[OUT_LO]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -282,9 +328,18 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test12:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
-; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
-; CHECK-THUMB: bge
+; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK-THUMB: cmp     [[REG1]], r2
+; CHECK-THUMB: movhi.w  [[CARRY_LO]], #1
+; CHECK-THUMB: cmp     [[REG2]], r3
+; CHECK-THUMB: movgt  [[CARRY_HI]], #1
+; CHECK-THUMB: moveq   [[CARRY_HI]], [[CARRY_LO]]
+; CHECK-THUMB: mov     [[OUT_HI:[a-z0-9]+]], r3
+; CHECK-THUMB: cmp     [[CARRY_HI]], #0
+; CHECK-THUMB: mov     [[OUT_LO:[a-z0-9]+]], r2
+; CHECK-THUMB: movne   [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne   [[OUT_LO]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -298,9 +353,18 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test13:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
-; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: bhs
+; CHECK: mov     [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK: mov     [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK: mov     [[OUT_HI:[a-z0-9]+]], r2
+; CHECK: cmp     [[REG1]], r1
+; CHECK: movwhi  [[CARRY_LO]], #1
+; CHECK: cmp     [[REG2]], r2
+; CHECK: movwhi  [[CARRY_HI]], #1
+; CHECK: moveq   [[CARRY_HI]], [[CARRY_LO]]
+; CHECK: cmp     [[CARRY_HI]], #0
+; CHECK: movne   [[OUT_HI]], [[REG2]]
+; CHECK: mov     [[OUT_LO:[a-z0-9]+]], r1
+; CHECK: movne   [[OUT_LO]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -309,9 +373,18 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test13:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
-; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
-; CHECK-THUMB: bhs
+; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK-THUMB: cmp     [[REG1]], r2
+; CHECK-THUMB: movhi.w  [[CARRY_LO]], #1
+; CHECK-THUMB: cmp     [[REG2]], r3
+; CHECK-THUMB: movhi  [[CARRY_HI]], #1
+; CHECK-THUMB: moveq   [[CARRY_HI]], [[CARRY_LO]]
+; CHECK-THUMB: mov     [[OUT_HI:[a-z0-9]+]], r3
+; CHECK-THUMB: cmp     [[CARRY_HI]], #0
+; CHECK-THUMB: mov     [[OUT_LO:[a-z0-9]+]], r2
+; CHECK-THUMB: movne   [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne   [[OUT_LO]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll
index 51ada69..a473807 100644
--- a/test/CodeGen/ARM/atomic-cmp.ll
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -10,6 +10,6 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
 ; T2-LABEL: t:
 ; T2: ldrexb
 ; T2: strexb
-  %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic
+  %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
   ret i8 %tmp0
 }
diff --git a/test/CodeGen/ARM/atomic-load-store.ll b/test/CodeGen/ARM/atomic-load-store.ll
index 53c7184..45a263d 100644
--- a/test/CodeGen/ARM/atomic-load-store.ll
+++ b/test/CodeGen/ARM/atomic-load-store.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=THUMBTWO
 ; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE
-; RUN  llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4
+; RUN: llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4
 
 define void @test1(i32* %ptr, i32 %val1) {
 ; ARM: test1
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 9a79c9f..ac8e949 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -194,3 +194,40 @@ entry:
   %0 = atomicrmw add i32* %p, i32 1 monotonic
   ret i32 %0
 }
+
+define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_cmpxchg_fail_order:
+
+  %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+; CHECK:     dmb ish
+; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK:     cmp     [[OLDVAL]], r1
+; CHECK:     bxne    lr
+; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK:     cmp     [[SUCCESS]], #0
+; CHECK:     bne     [[LOOP_BB]]
+; CHECK:     dmb     ish
+; CHECK:     bx      lr
+
+  ret i32 %oldval
+}
+
+define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_cmpxchg_fail_order1:
+
+  %oldval = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire
+; CHECK-NOT:     dmb ish
+; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK:     cmp     [[OLDVAL]], r1
+; CHECK:     bne     [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK:     cmp     [[SUCCESS]], #0
+; CHECK:     bne     [[LOOP_BB]]
+; CHECK: [[END_BB]]:
+; CHECK:     dmb     ish
+; CHECK:     bx      lr
+
+  ret i32 %oldval
+}
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index 3f93929..7922e22 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
+; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-THUMB
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -15,7 +15,7 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -38,7 +38,7 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -61,7 +61,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -75,7 +75,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+define void @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_add_i64:
    %old = atomicrmw add i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
@@ -84,10 +84,10 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: adds [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: adds{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
 ; CHECK-NEXT: adc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
 ; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
@@ -95,9 +95,9 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+  store i64 %old, i64* @var64
+   ret void
 }
 
 define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
@@ -109,7 +109,7 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -132,7 +132,7 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -155,7 +155,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -169,7 +169,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+define void @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_sub_i64:
    %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -178,10 +178,10 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: subs [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: subs{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
 ; CHECK-NEXT: sbc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
 ; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
@@ -189,9 +189,9 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
 define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
@@ -203,7 +203,7 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -226,7 +226,7 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -249,7 +249,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -263,7 +263,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+define void @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_and_i64:
    %old = atomicrmw and i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
@@ -272,20 +272,20 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: and{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
-; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
 define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
@@ -297,7 +297,7 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -320,7 +320,7 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -343,7 +343,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -357,7 +357,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+define void @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_or_i64:
    %old = atomicrmw or i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
@@ -366,20 +366,20 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: orr{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
-; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
 define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
@@ -391,7 +391,7 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -414,7 +414,7 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -437,7 +437,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -451,7 +451,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+define void @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xor_i64:
    %old = atomicrmw xor i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
@@ -460,20 +460,20 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: eor{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
-; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
 define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
@@ -485,7 +485,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
@@ -507,7 +507,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
@@ -529,7 +529,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
@@ -542,7 +542,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+define void @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i64:
    %old = atomicrmw xchg i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
@@ -551,7 +551,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
@@ -560,28 +560,28 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
-define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+define i8 @test_atomic_load_min_i8(i8 signext %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i8:
    %old = atomicrmw min i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-DAG: movw [[ADDR:r[0-9]+|lr]], :lower16:var8
+; CHECK-DAG: movt [[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp r[[OLDX]], r0
-; Thumb mode: it ge
-; CHECK:      movge r[[OLDX]], r0
-; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; Thumb mode: it le
+; CHECK:      movle r[[OLDX]], r[[OLD]]
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -591,23 +591,23 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
    ret i8 %old
 }
 
-define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+define i16 @test_atomic_load_min_i16(i16 signext %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i16:
    %old = atomicrmw min i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16
+; CHECK: movt [[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp r[[OLDX]], r0
-; Thumb mode: it ge
-; CHECK:      movge r[[OLDX]], r0
-; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; Thumb mode: it le
+; CHECK:      movle r[[OLDX]], r[[OLD]]
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -626,13 +626,13 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
 ; CHECK-NEXT: cmp r[[OLD]], r0
-; Thumb mode: it lt
-; CHECK:      movlt r[[NEW]], r[[OLD]]
+; Thumb mode: it le
+; CHECK:      movle r[[NEW]], r[[OLD]]
 ; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -643,7 +643,7 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+define void @test_atomic_load_min_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_min_i64:
    %old = atomicrmw min i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -652,41 +652,50 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
-; CHECK-NEXT: blt .LBB{{[0-9]+}}_3
-; CHECK-NEXT: BB#2:
-; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: cmp [[OLD1]], r0
+; CHECK-ARM: movwls [[LOCARRY]], #1
+; CHECK-ARM: cmp [[OLD2]], r1
+; CHECK-ARM: movwle [[HICARRY]], #1
+; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
+; CHECK-ARM: cmp [[HICARRY]], #0
+; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
+; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
+; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
-define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+define i8 @test_atomic_load_max_i8(i8 signext %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i8:
    %old = atomicrmw max i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8
+; CHECK: movt [[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp r[[OLDX]], r0
-; Thumb mode: it le
-; CHECK:      movle r[[OLDX]], r0
-; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; Thumb mode: it gt
+; CHECK:      movgt r[[OLDX]], r[[OLD]]
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -696,7 +705,7 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
    ret i8 %old
 }
 
-define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+define i16 @test_atomic_load_max_i16(i16 signext %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i16:
    %old = atomicrmw max i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
@@ -705,13 +714,13 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
 ; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp r[[OLDX]], r0
-; Thumb mode: it le
-; CHECK:      movle r[[OLDX]], r0
+; Thumb mode: it gt
+; CHECK:      movgt r[[OLDX]], r[[OLD]]
 ; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -731,7 +740,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
@@ -748,7 +757,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+define void @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_max_i64:
    %old = atomicrmw max i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
@@ -757,41 +766,50 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
-; CHECK-NEXT: bge .LBB{{[0-9]+}}_3
-; CHECK-NEXT: BB#2:
-; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: cmp [[OLD1]], r0
+; CHECK-ARM: movwhi [[LOCARRY]], #1
+; CHECK-ARM: cmp [[OLD2]], r1
+; CHECK-ARM: movwgt [[HICARRY]], #1
+; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
+; CHECK-ARM: cmp [[HICARRY]], #0
+; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
+; CHECK-ARM: strexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
+; CHECK-THUMB: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
-define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+define i8 @test_atomic_load_umin_i8(i8 zeroext %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i8:
    %old = atomicrmw umin i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8
+; CHECK: movt [[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
 ; CHECK-NEXT: cmp r[[OLD]], r0
-; Thumb mode: it lo
-; CHECK:      movlo r[[NEW]], r[[OLD]]
-; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; Thumb mode: it ls
+; CHECK:      movls r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -801,23 +819,23 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
    ret i8 %old
 }
 
-define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+define i16 @test_atomic_load_umin_i16(i16 zeroext %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i16:
    %old = atomicrmw umin i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16
+; CHECK: movt [[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
 ; CHECK-NEXT: cmp r[[OLD]], r0
-; Thumb mode: it lo
-; CHECK:      movlo r[[NEW]], r[[OLD]]
-; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; Thumb mode: it ls
+; CHECK:      movls r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -836,13 +854,13 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
 ; CHECK-NEXT: cmp r[[OLD]], r0
-; Thumb mode: it lo
-; CHECK:      movlo r[[NEW]], r[[OLD]]
+; Thumb mode: it ls
+; CHECK:      movls r[[NEW]], r[[OLD]]
 ; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -853,50 +871,59 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+define void @test_atomic_load_umin_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umin_i64:
-   %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
+   %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
-; CHECK-NEXT: blo .LBB{{[0-9]+}}_3
-; CHECK-NEXT: BB#2:
-; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: cmp [[OLD1]], r0
+; CHECK-ARM: movwls [[LOCARRY]], #1
+; CHECK-ARM: cmp [[OLD2]], r1
+; CHECK-ARM: movwls [[HICARRY]], #1
+; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
+; CHECK-ARM: cmp [[HICARRY]], #0
+; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
+; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
+; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
-define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+define i8 @test_atomic_load_umax_i8(i8 zeroext %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i8:
    %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8
+; CHECK: movt [[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
 ; CHECK-NEXT: cmp r[[OLD]], r0
 ; Thumb mode: it hi
 ; CHECK:      movhi r[[NEW]], r[[OLD]]
-; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -906,23 +933,23 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
    ret i8 %old
 }
 
-define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+define i16 @test_atomic_load_umax_i16(i16 zeroext %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i16:
    %old = atomicrmw umax i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16
+; CHECK: movt [[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
 ; CHECK-NEXT: cmp r[[OLD]], r0
 ; Thumb mode: it hi
 ; CHECK:      movhi r[[NEW]], r[[OLD]]
-; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -941,7 +968,7 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
@@ -958,50 +985,59 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
    ret i32 %old
 }
 
-define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_umax_i64:
-   %old = atomicrmw umax i64* @var64, i64 %offset release
+   %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
-; CHECK-NEXT: bhs .LBB{{[0-9]+}}_3
-; CHECK-NEXT: BB#2:
-; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: cmp [[OLD1]], r0
+; CHECK-ARM: movwhi [[LOCARRY]], #1
+; CHECK-ARM: cmp [[OLD2]], r1
+; CHECK-ARM: movwhi [[HICARRY]], #1
+; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
+; CHECK-ARM: cmp [[HICARRY]], #0
+; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
+; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
+; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
-   ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
-define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
-   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
 ; CHECK: movt r[[ADDR]], :upper16:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp r[[OLD]], r0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
 ; CHECK-NEXT: BB#2:
   ; As above, r1 is a reasonable guess.
-; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK: strexb [[STATUS:r[0-9]+]], r1, {{.*}}[[ADDR]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -1011,23 +1047,23 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
    ret i8 %old
 }
 
-define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
-   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
 ; CHECK: movt r[[ADDR]], :upper16:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp r[[OLD]], r0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
 ; CHECK-NEXT: BB#2:
   ; As above, r1 is a reasonable guess.
-; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -1037,59 +1073,60 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
    ret i16 %old
 }
 
-define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
-   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
+   store i32 %old, i32* @var32
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
 ; CHECK: movt r[[ADDR]], :upper16:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp r[[OLD]], r0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
 ; CHECK-NEXT: BB#2:
   ; As above, r1 is a reasonable guess.
-; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD]]
-   ret i32 %old
+; CHECK: str{{(.w)?}} r[[OLD]],
+   ret void
 }
 
-define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
-   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
 ; CHECK: movt r[[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
+; CHECK: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: cmp   [[OLD1]], r0
-; Thumb mode: it eq
-; CHECK:      cmpeq [[OLD2]], r1
+; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
+; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
+; CHECK: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
 ; CHECK-NEXT: BB#2:
   ; As above, r2, r3 is a reasonable guess.
-; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
+; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, [[OLD1]]
-; CHECK-NEXT: mov r1, [[OLD2]]
-   ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+   store i64 %old, i64* @var64
+   ret void
 }
 
 define i8 @test_atomic_load_monotonic_i8() nounwind {
@@ -1303,13 +1340,13 @@ define void @test_atomic_store_release_i64(i64 %val) nounwind {
   store atomic i64 %val, i64* @var64 release, align 8
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
-; CHECK: movt r[[ADDR]], :upper16:var64
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var64
+; CHECK: movt [[ADDR]], :upper16:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, {{.*}}[[ADDR]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -1337,7 +1374,7 @@ atomic_ver:
   ; The key point here is that the second dmb isn't immediately followed by the
   ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
   ; with isBarrier. For now, look for something that looks like "somewhere".
-; CHECK-NEXT: mov
+; CHECK-NEXT: {{mov|bx}}
 somewhere:
   %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
   ret i32 %combined
diff --git a/test/CodeGen/ARM/atomicrmw_minmax.ll b/test/CodeGen/ARM/atomicrmw_minmax.ll
index 5befc22..68bf714 100644
--- a/test/CodeGen/ARM/atomicrmw_minmax.ll
+++ b/test/CodeGen/ARM/atomicrmw_minmax.ll
@@ -1,4 +1,4 @@
-;  RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s
 
 ;  CHECK-LABEL: max:
 define i32 @max(i8 %ctx, i32* %ptr, i32 %val)
@@ -15,7 +15,7 @@ define i32 @min(i8 %ctx, i32* %ptr, i32 %val)
 {
 ;  CHECK: ldrex
 ;  CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]]
-;  CHECK: movlo {{r[0-9]*}}, [[old]]
+;  CHECK: movls {{r[0-9]*}}, [[old]]
   %old = atomicrmw umin i32* %ptr, i32 %val monotonic
   ret i32 %old
 }
diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll
index 3a17d2b..1162aac 100644
--- a/test/CodeGen/ARM/bfc.ll
+++ b/test/CodeGen/ARM/bfc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 72a4678..bce09da 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s
+; RUN: llc -mtriple=arm -mattr=+v6t2 %s -o - | FileCheck %s
 
 %struct.F = type { [3 x i8], i8 }
 
diff --git a/test/CodeGen/ARM/bfx.ll b/test/CodeGen/ARM/bfx.ll
index 394da9e..46f49e9 100644
--- a/test/CodeGen/ARM/bfx.ll
+++ b/test/CodeGen/ARM/bfx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v7 %s -o - | FileCheck %s
 
 define i32 @sbfx1(i32 %a) {
 ; CHECK: sbfx1
diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll
index 1dfd627..691f8be 100644
--- a/test/CodeGen/ARM/bic.ll
+++ b/test/CodeGen/ARM/bic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %b, 4294967295
diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll
index ce1b2ad..14aa27e 100644
--- a/test/CodeGen/ARM/bits.ll
+++ b/test/CodeGen/ARM/bits.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
diff --git a/test/CodeGen/ARM/build-attributes-encoding.s b/test/CodeGen/ARM/build-attributes-encoding.s
index 5ad51b2..34a1ad3 100644
--- a/test/CodeGen/ARM/build-attributes-encoding.s
+++ b/test/CodeGen/ARM/build-attributes-encoding.s
@@ -4,7 +4,7 @@
 // RUN:   | llvm-readobj -s -sd | FileCheck %s
 
 // Tag_CPU_name (=5)
-.cpu Cortex-A8
+.cpu cortex-a8
 
 // Tag_CPU_arch (=6)
 .eabi_attribute 6, 10
@@ -61,7 +61,7 @@
 .eabi_attribute 110, 160
 
 // Check that tags > 128 are encoded properly
-.eabi_attribute 129, 1
+.eabi_attribute 129, "1"
 .eabi_attribute 250, 1
 
 // CHECK:        Section {
@@ -71,15 +71,15 @@
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
 // CHECK-NEXT:     Offset: 0x34
-// CHECK-NEXT:     Size: 70
+// CHECK-NEXT:     Size: 71
 // CHECK-NEXT:     Link: 0
 // CHECK-NEXT:     Info: 0
 // CHECK-NEXT:     AddressAlignment: 1
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 41450000 00616561 62690001 3B000000
+// CHECK-NEXT:       0000: 41460000 00616561 62690001 3C000000
 // CHECK-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
 // CHECK-NEXT:       0020: 0109020A 030C0214 01150117 01180119
 // CHECK-NEXT:       0030: 011B001C 0124012A 012C0244 036EA001
-// CHECK-NEXT:       0040: 810101FA 0101
+// CHECK-NEXT:       0040: 81013100 FA0101
 // CHECK-NEXT:     )
diff --git a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/test/CodeGen/ARM/build-attributes.ll
index 3053694..3e825e8 100644
--- a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -1,6 +1,7 @@
 ; This tests that MC/asm header conversion is smooth and that the
 ; build attributes are correct
 
+; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale | FileCheck %s --check-prefix=XSCALE
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S
@@ -12,16 +13,30 @@
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,-crypto | FileCheck %s --check-prefix=V8-NEON
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-neon,+d16 | FileCheck %s --check-prefix=CORTEX-A5-NONEON
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A5-NOFPU
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A12-NOFPU
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9-mp | FileCheck %s --check-prefix=CORTEX-A9-MP
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD
 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s  --check-prefix=CORTEX-A7-CHECK
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
+
+; XSCALE:      .eabi_attribute 6, 5
+; XSCALE:      .eabi_attribute 8, 1
+; XSCALE:      .eabi_attribute 9, 1
 
 ; V6:   .eabi_attribute 6, 6
 ; V6:   .eabi_attribute 8, 1
@@ -34,7 +49,7 @@
 ; V6-NOT:    .eabi_attribute 68
 
 ; V6M:  .eabi_attribute 6, 12
-; V6M:  .eabi_attribute 7, 77
+; V6M-NOT:  .eabi_attribute 7
 ; V6M:  .eabi_attribute 8, 0
 ; V6M:  .eabi_attribute 9, 1
 ; V6M:  .eabi_attribute 24, 1
@@ -71,7 +86,7 @@
 ; V7M-NOT:  .eabi_attribute 28
 ; V7M-NOT:  .eabi_attribute 36
 ; V7M-NOT:  .eabi_attribute 42
-; V7M:  .eabi_attribute 44, 0
+; V7M-NOT:  .eabi_attribute 44
 ; V7M-NOT:  .eabi_attribute 68
 
 ; V7:      .syntax unified
@@ -112,6 +127,117 @@
 ; V8-FPARMv8-NEON-CRYPTO: .fpu crypto-neon-fp-armv8
 ; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 12, 3
 
+; Tag_CPU_arch	'ARMv7'
+; CORTEX-A7-CHECK: .eabi_attribute	6, 10
+; CORTEX-A7-NOFPU: .eabi_attribute	6, 10
+; CORTEX-A7-FPUV4: .eabi_attribute	6, 10
+
+; Tag_CPU_arch_profile 'A'
+; CORTEX-A7-CHECK: .eabi_attribute	7, 65
+; CORTEX-A7-NOFPU: .eabi_attribute	7, 65
+; CORTEX-A7-FPUV4: .eabi_attribute	7, 65
+
+; Tag_ARM_ISA_use
+; CORTEX-A7-CHECK: .eabi_attribute	8, 1
+; CORTEX-A7-NOFPU: .eabi_attribute	8, 1
+; CORTEX-A7-FPUV4: .eabi_attribute	8, 1
+
+; Tag_THUMB_ISA_use
+; CORTEX-A7-CHECK: .eabi_attribute	9, 2
+; CORTEX-A7-NOFPU: .eabi_attribute	9, 2
+; CORTEX-A7-FPUV4: .eabi_attribute	9, 2
+
+; CORTEX-A7-CHECK: .fpu	neon-vfpv4
+; CORTEX-A7-NOFPU-NOT: .fpu
+; CORTEX-A7-FPUV4: .fpu	vfpv4
+
+; Tag_ABI_FP_denormal
+; CORTEX-A7-CHECK: .eabi_attribute	20, 1
+; CORTEX-A7-NOFPU: .eabi_attribute	20, 1
+; CORTEX-A7-FPUV4: .eabi_attribute	20, 1
+
+; Tag_ABI_FP_exceptions
+; CORTEX-A7-CHECK: .eabi_attribute	21, 1
+; CORTEX-A7-NOFPU: .eabi_attribute	21, 1
+; CORTEX-A7-FPUV4: .eabi_attribute	21, 1
+
+; Tag_ABI_FP_number_model
+; CORTEX-A7-CHECK: .eabi_attribute	23, 3
+; CORTEX-A7-NOFPU: .eabi_attribute	23, 3
+; CORTEX-A7-FPUV4: .eabi_attribute	23, 3
+
+; Tag_ABI_align_needed
+; CORTEX-A7-CHECK: .eabi_attribute	24, 1
+; CORTEX-A7-NOFPU: .eabi_attribute	24, 1
+; CORTEX-A7-FPUV4: .eabi_attribute	24, 1
+
+; Tag_ABI_align_preserved
+; CORTEX-A7-CHECK: .eabi_attribute	25, 1
+; CORTEX-A7-NOFPU: .eabi_attribute	25, 1
+; CORTEX-A7-FPUV4: .eabi_attribute	25, 1
+
+; Tag_FP_HP_extension
+; CORTEX-A7-CHECK: .eabi_attribute	36, 1
+; CORTEX-A7-NOFPU: .eabi_attribute	36, 1
+; CORTEX-A7-FPUV4: .eabi_attribute	36, 1
+
+; Tag_MPextension_use
+; CORTEX-A7-CHECK: .eabi_attribute	42, 1
+; CORTEX-A7-NOFPU: .eabi_attribute	42, 1
+; CORTEX-A7-FPUV4: .eabi_attribute	42, 1
+
+; Tag_DIV_use
+; CORTEX-A7-CHECK: .eabi_attribute	44, 2
+; CORTEX-A7-NOFPU: .eabi_attribute	44, 2
+; CORTEX-A7-FPUV4: .eabi_attribute	44, 2
+
+; Tag_Virtualization_use
+; CORTEX-A7-CHECK: .eabi_attribute	68, 3
+; CORTEX-A7-NOFPU: .eabi_attribute	68, 3
+; CORTEX-A7-FPUV4: .eabi_attribute	68, 3
+
+; CORTEX-A5-DEFAULT:        .cpu    cortex-a5
+; CORTEX-A5-DEFAULT:        .eabi_attribute 6, 10
+; CORTEX-A5-DEFAULT:        .eabi_attribute 7, 65
+; CORTEX-A5-DEFAULT:        .eabi_attribute 8, 1
+; CORTEX-A5-DEFAULT:        .eabi_attribute 9, 2
+; CORTEX-A5-DEFAULT:        .fpu    neon-vfpv4
+; CORTEX-A5-DEFAULT:        .eabi_attribute 20, 1
+; CORTEX-A5-DEFAULT:        .eabi_attribute 21, 1
+; CORTEX-A5-DEFAULT:        .eabi_attribute 23, 3
+; CORTEX-A5-DEFAULT:        .eabi_attribute 24, 1
+; CORTEX-A5-DEFAULT:        .eabi_attribute 25, 1
+; CORTEX-A5-DEFAULT:        .eabi_attribute 42, 1
+; CORTEX-A5-DEFAULT:        .eabi_attribute 68, 1
+
+; CORTEX-A5-NONEON:        .cpu    cortex-a5
+; CORTEX-A5-NONEON:        .eabi_attribute 6, 10
+; CORTEX-A5-NONEON:        .eabi_attribute 7, 65
+; CORTEX-A5-NONEON:        .eabi_attribute 8, 1
+; CORTEX-A5-NONEON:        .eabi_attribute 9, 2
+; CORTEX-A5-NONEON:        .fpu    vfpv4-d16
+; CORTEX-A5-NONEON:        .eabi_attribute 20, 1
+; CORTEX-A5-NONEON:        .eabi_attribute 21, 1
+; CORTEX-A5-NONEON:        .eabi_attribute 23, 3
+; CORTEX-A5-NONEON:        .eabi_attribute 24, 1
+; CORTEX-A5-NONEON:        .eabi_attribute 25, 1
+; CORTEX-A5-NONEON:        .eabi_attribute 42, 1
+; CORTEX-A5-NONEON:        .eabi_attribute 68, 1
+
+; CORTEX-A5-NOFPU:        .cpu    cortex-a5
+; CORTEX-A5-NOFPU:        .eabi_attribute 6, 10
+; CORTEX-A5-NOFPU:        .eabi_attribute 7, 65
+; CORTEX-A5-NOFPU:        .eabi_attribute 8, 1
+; CORTEX-A5-NOFPU:        .eabi_attribute 9, 2
+; CORTEX-A5-NOFPU-NOT:    .fpu
+; CORTEX-A5-NOFPU:        .eabi_attribute 20, 1
+; CORTEX-A5-NOFPU:        .eabi_attribute 21, 1
+; CORTEX-A5-NOFPU:        .eabi_attribute 23, 3
+; CORTEX-A5-NOFPU:        .eabi_attribute 24, 1
+; CORTEX-A5-NOFPU:        .eabi_attribute 25, 1
+; CORTEX-A5-NOFPU:        .eabi_attribute 42, 1
+; CORTEX-A5-NOFPU:        .eabi_attribute 68, 1
+
 ; CORTEX-A9-SOFT:  .cpu cortex-a9
 ; CORTEX-A9-SOFT:  .eabi_attribute 6, 10
 ; CORTEX-A9-SOFT:  .eabi_attribute 7, 65
@@ -157,12 +283,42 @@
 ; CORTEX-A9-MP:  .eabi_attribute 23, 3
 ; CORTEX-A9-MP:  .eabi_attribute 24, 1
 ; CORTEX-A9-MP:  .eabi_attribute 25, 1
-; CORTEX-A9-NOT:  .eabi_attribute 27
-; CORTEX-A9-NOT:  .eabi_attribute 28
+; CORTEX-A9-MP-NOT:  .eabi_attribute 27
+; CORTEX-A9-MP-NOT:  .eabi_attribute 28
 ; CORTEX-A9-MP:  .eabi_attribute 36, 1
 ; CORTEX-A9-MP:  .eabi_attribute 42, 1
 ; CORTEX-A9-MP:  .eabi_attribute 68, 1
 
+; CORTEX-A12-DEFAULT:  .cpu cortex-a12
+; CORTEX-A12-DEFAULT:  .eabi_attribute 6, 10
+; CORTEX-A12-DEFAULT:  .eabi_attribute 7, 65
+; CORTEX-A12-DEFAULT:  .eabi_attribute 8, 1
+; CORTEX-A12-DEFAULT:  .eabi_attribute 9, 2
+; CORTEX-A12-DEFAULT:  .fpu neon-vfpv4
+; CORTEX-A12-DEFAULT:  .eabi_attribute 20, 1
+; CORTEX-A12-DEFAULT:  .eabi_attribute 21, 1
+; CORTEX-A12-DEFAULT:  .eabi_attribute 23, 3
+; CORTEX-A12-DEFAULT:  .eabi_attribute 24, 1
+; CORTEX-A12-DEFAULT:  .eabi_attribute 25, 1
+; CORTEX-A12-DEFAULT:  .eabi_attribute 42, 1
+; CORTEX-A12-DEFAULT:  .eabi_attribute 44, 2
+; CORTEX-A12-DEFAULT:  .eabi_attribute 68, 3
+
+; CORTEX-A12-NOFPU:  .cpu cortex-a12
+; CORTEX-A12-NOFPU:  .eabi_attribute 6, 10
+; CORTEX-A12-NOFPU:  .eabi_attribute 7, 65
+; CORTEX-A12-NOFPU:  .eabi_attribute 8, 1
+; CORTEX-A12-NOFPU:  .eabi_attribute 9, 2
+; CORTEX-A12-NOFPU-NOT:  .fpu
+; CORTEX-A12-NOFPU:  .eabi_attribute 20, 1
+; CORTEX-A12-NOFPU:  .eabi_attribute 21, 1
+; CORTEX-A12-NOFPU:  .eabi_attribute 23, 3
+; CORTEX-A12-NOFPU:  .eabi_attribute 24, 1
+; CORTEX-A12-NOFPU:  .eabi_attribute 25, 1
+; CORTEX-A12-NOFPU:  .eabi_attribute 42, 1
+; CORTEX-A12-NOFPU:  .eabi_attribute 44, 2
+; CORTEX-A12-NOFPU:  .eabi_attribute 68, 3
+
 ; CORTEX-A15: .cpu cortex-a15
 ; CORTEX-A15: .eabi_attribute 6, 10
 ; CORTEX-A15: .eabi_attribute 7, 65
@@ -183,7 +339,7 @@
 
 ; CORTEX-M0:  .cpu cortex-m0
 ; CORTEX-M0:  .eabi_attribute 6, 12
-; CORTEX-M0:  .eabi_attribute 7, 77
+; CORTEX-M0-NOT:  .eabi_attribute 7
 ; CORTEX-M0:  .eabi_attribute 8, 0
 ; CORTEX-M0:  .eabi_attribute 9, 1
 ; CORTEX-M0:  .eabi_attribute 24, 1
@@ -194,6 +350,23 @@
 ; CORTEX-M0-NOT:  .eabi_attribute 42
 ; CORTEX-M0-NOT:  .eabi_attribute 68
 
+; CORTEX-M3:  .cpu cortex-m3
+; CORTEX-M3:  .eabi_attribute 6, 10
+; CORTEX-M3:  .eabi_attribute 7, 77
+; CORTEX-M3:  .eabi_attribute 8, 0
+; CORTEX-M3:  .eabi_attribute 9, 2
+; CORTEX-M3:  .eabi_attribute 20, 1
+; CORTEX-M3:  .eabi_attribute 21, 1
+; CORTEX-M3:  .eabi_attribute 23, 3
+; CORTEX-M3:  .eabi_attribute 24, 1
+; CORTEX-M3:  .eabi_attribute 25, 1
+; CORTEX-M3-NOT:  .eabi_attribute 27
+; CORTEX-M3-NOT:  .eabi_attribute 28
+; CORTEX-M3-NOT:  .eabi_attribute 36
+; CORTEX-M3-NOT:  .eabi_attribute 42
+; CORTEX-M3-NOT:  .eabi_attribute 44
+; CORTEX-M3-NOT:  .eabi_attribute 68
+
 ; CORTEX-M4-SOFT:  .cpu cortex-m4
 ; CORTEX-M4-SOFT:  .eabi_attribute 6, 13
 ; CORTEX-M4-SOFT:  .eabi_attribute 7, 77
@@ -209,7 +382,7 @@
 ; CORTEX-M4-SOFT-NOT:  .eabi_attribute 28
 ; CORTEX-M4-SOFT:  .eabi_attribute 36, 1
 ; CORTEX-M4-SOFT-NOT:  .eabi_attribute 42
-; CORTEX-M4-SOFT:  .eabi_attribute 44, 0
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 44
 ; CORTEX-M4-SOFT-NOT:  .eabi_attribute 68
 
 ; CORTEX-M4-HARD:  .cpu cortex-m4
@@ -227,8 +400,8 @@
 ; CORTEX-M4-HARD:  .eabi_attribute 28, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 36, 1
 ; CORTEX-M4-HARD-NOT:  .eabi_attribute 42
-; CORTEX-M4-HARD:  .eabi_attribute 44, 0
-; CORTEX-M4-HRAD-NOT:  .eabi_attribute 68
+; CORTEX-M4-HARD-NOT:  .eabi_attribute 44
+; CORTEX-M4-HARD-NOT:  .eabi_attribute 68
 
 ; CORTEX-R5:  .cpu cortex-r5
 ; CORTEX-R5:  .eabi_attribute 6, 10
@@ -261,7 +434,7 @@
 ; CORTEX-A53-NOT:  .eabi_attribute 28
 ; CORTEX-A53:  .eabi_attribute 36, 1
 ; CORTEX-A53:  .eabi_attribute 42, 1
-; CORTEX-A53:  .eabi_attribute 44, 2
+; CORTEX-A53-NOT:  .eabi_attribute 44
 ; CORTEX-A53:  .eabi_attribute 68, 3
 
 ; CORTEX-A57:  .cpu cortex-a57
@@ -277,7 +450,7 @@
 ; CORTEX-A57-NOT:  .eabi_attribute 28
 ; CORTEX-A57:  .eabi_attribute 36, 1
 ; CORTEX-A57:  .eabi_attribute 42, 1
-; CORTEX-A57:  .eabi_attribute 44, 2
+; CORTEX-A57-NOT:  .eabi_attribute 44
 ; CORTEX-A57:  .eabi_attribute 68, 3
 
 define i32 @f(i64 %z) {
diff --git a/test/CodeGen/ARM/cache-intrinsic.ll b/test/CodeGen/ARM/cache-intrinsic.ll
new file mode 100644
index 0000000..6048917
--- /dev/null
+++ b/test/CodeGen/ARM/cache-intrinsic.ll
@@ -0,0 +1,26 @@
+; RUN: llc %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+@buffer = global [32 x i8] c"This is a largely unused buffer\00", align 1
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+@.str1 = private unnamed_addr constant [25 x i8] c"Still, largely unused...\00", align 1
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
+  %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8]* @.str1, i32 0, i32 0)) #3
+  call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
+  ret i32 0
+}
+
+; CHECK: __clear_cache
+
+declare i32 @printf(i8*, ...)
+
+declare i8* @strcpy(i8*, i8*)
+
+declare void @llvm.clear_cache(i8*, i8*)
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index d463602..40694bf 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=armv6-apple-ios5.0 -mattr=+vfp2 | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 \
+; RUN:    | FileCheck %s -check-prefix=CHECKELF
 
 ; Enable tailcall optimization for iOS 5.0
 ; rdar://9120031
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 107e79a..f6301cf 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -1,7 +1,11 @@
-; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s -check-prefix=CHECKV4
-; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi\
-; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - \
+; RUN:   | FileCheck %s -check-prefix=CHECKV4
+
+; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - \
+; RUN:   | FileCheck %s -check-prefix=CHECKV5
+
+; RUN: llc -mtriple=armv6-linux-gnueabi -relocation-model=pic %s -o - \
+; RUN:   | FileCheck %s -check-prefix=CHECKELF
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index f67987f..e344b08 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index 5b6a584..68e8c7c 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s
 
 declare i32 @llvm.ctlz.i32(i32, i1)
 
diff --git a/test/CodeGen/ARM/coalesce-dbgvalue.ll b/test/CodeGen/ARM/coalesce-dbgvalue.ll
index 86106a0..606c9bc 100644
--- a/test/CodeGen/ARM/coalesce-dbgvalue.ll
+++ b/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -81,7 +81,7 @@ attributes #3 = { nounwind }
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 182024) (llvm/trunk 182023)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/d/b/pr16110.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"pr16110.c", metadata !"/d/b"}
-!2 = metadata !{i32 0}
+!2 = metadata !{}
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"pr16110", metadata !"pr16110", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @pr16110, null, null, metadata !9, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [pr16110]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/d/b/pr16110.c]
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index fac2bc5..323eb1f 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
-; RUN:   grep vcmpe.f32
+; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s
 
 define void @test3(float* %glob, i32 %X) {
 entry:
@@ -18,3 +17,6 @@ UnifiedReturnBlock:             ; preds = %entry
 }
 
 declare i32 @bar(...)
+
+; CHECK: vcmpe.f32
+
diff --git a/test/CodeGen/ARM/constantfp.ll b/test/CodeGen/ARM/constantfp.ll
index 974bdd7..27b6e9b 100644
--- a/test/CodeGen/ARM/constantfp.ll
+++ b/test/CodeGen/ARM/constantfp.ll
@@ -15,7 +15,7 @@ define arm_aapcs_vfpcc float @test_vmov_imm() {
 ; CHECK: vmov.i32 d0, #0
 
 ; CHECK-NONEON-LABEL: test_vmov_imm:
-; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
   ret float 0.0
 }
 
@@ -24,7 +24,7 @@ define arm_aapcs_vfpcc float @test_vmvn_imm() {
 ; CHECK: vmvn.i32 d0, #0xb0000000
 
 ; CHECK-NONEON-LABEL: test_vmvn_imm:
-; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
   ret float 8589934080.0
 }
 
@@ -33,7 +33,7 @@ define arm_aapcs_vfpcc double @test_vmov_f64() {
 ; CHECK: vmov.f64 d0, #1.0
 
 ; CHECK-NONEON-LABEL: test_vmov_f64:
-; CHECK_NONEON: vmov.f64 d0, #1.0
+; CHECK-NONEON: vmov.f64 d0, #1.0
 
   ret double 1.0
 }
@@ -43,7 +43,7 @@ define arm_aapcs_vfpcc double @test_vmov_double_imm() {
 ; CHECK: vmov.i32 d0, #0
 
 ; CHECK-NONEON-LABEL: test_vmov_double_imm:
-; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
   ret double 0.0
 }
 
@@ -52,7 +52,7 @@ define arm_aapcs_vfpcc double @test_vmvn_double_imm() {
 ; CHECK: vmvn.i32 d0, #0xb0000000
 
 ; CHECK-NONEON-LABEL: test_vmvn_double_imm:
-; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
   ret double 0x4fffffff4fffffff
 }
 
@@ -63,6 +63,6 @@ define arm_aapcs_vfpcc double @test_notvmvn_double_imm() {
 ; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
 
 ; CHECK-NONEON-LABEL: test_notvmvn_double_imm:
-; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
   ret double 0x4fffffffffffffff
 }
diff --git a/test/CodeGen/ARM/crash-O0.ll b/test/CodeGen/ARM/crash-O0.ll
index 8bce4e0..8855bb9 100644
--- a/test/CodeGen/ARM/crash-O0.ll
+++ b/test/CodeGen/ARM/crash-O0.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim -no-integrated-as
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
 target triple = "armv6-apple-darwin10"
 
diff --git a/test/CodeGen/ARM/cse-ldrlit.ll b/test/CodeGen/ARM/cse-ldrlit.ll
new file mode 100644
index 0000000..ea8c0ca
--- /dev/null
+++ b/test/CodeGen/ARM/cse-ldrlit.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=thumbv6m-apple-none-macho -relocation-model=pic -o -  %s | FileCheck %s --check-prefix=CHECK-THUMB-PIC
+; RUN: llc -mtriple=arm-apple-none-macho -relocation-model=pic -o -  %s | FileCheck %s --check-prefix=CHECK-ARM-PIC
+; RUN: llc -mtriple=thumbv6m-apple-none-macho -relocation-model=dynamic-no-pic -o -  %s | FileCheck %s --check-prefix=CHECK-DYNAMIC
+; RUN: llc -mtriple=arm-apple-none-macho -relocation-model=dynamic-no-pic -o -  %s | FileCheck %s --check-prefix=CHECK-DYNAMIC
+; RUN: llc -mtriple=thumbv6m-apple-none-macho -relocation-model=static -o -  %s | FileCheck %s --check-prefix=CHECK-STATIC
+; RUN: llc -mtriple=arm-apple-none-macho -relocation-model=static -o -  %s | FileCheck %s --check-prefix=CHECK-STATIC
+@var = global [16 x i32] zeroinitializer
+
+declare void @bar(i32*)
+
+define void @foo() {
+  %flag = load i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1)
+  %tst = icmp eq i32 %flag, 0
+  br i1 %tst, label %true, label %false
+true:
+  tail call void @bar(i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 4))
+  ret void
+false:
+  ret void
+}
+
+; CHECK-THUMB-PIC-LABEL: foo:
+; CHECK-THUMB-PIC: ldr r0, LCPI0_0
+; CHECK-THUMB-PIC: LPC0_0:
+; CHECK-THUMB-PIC-NEXT: add r0, pc
+; CHECK-THUMB-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
+
+; CHECK-THUMB-PIC: LCPI0_0:
+; CHECK-THUMB-PIC-NEXT: .long _var-(LPC0_0+4)
+; CHECK-THUMB-PIC-NOT: LCPI0_1
+
+
+; CHECK-ARM-PIC-LABEL: foo:
+; CHECK-ARM-PIC: ldr [[VAR_OFFSET:r[0-9]+]], LCPI0_0
+; CHECK-ARM-PIC: LPC0_0:
+; CHECK-ARM-PIC-NEXT: ldr r0, [pc, [[VAR_OFFSET]]]
+; CHECK-ARM-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
+
+; CHECK-ARM-PIC: LCPI0_0:
+; CHECK-ARM-PIC-NEXT: .long _var-(LPC0_0+8)
+; CHECK-ARM-PIC-NOT: LCPI0_1
+
+
+; CHECK-DYNAMIC-LABEL: foo:
+; CHECK-DYNAMIC: ldr r0, LCPI0_0
+; CHECK-DYNAMIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
+
+; CHECK-DYNAMIC: LCPI0_0:
+; CHECK-DYNAMIC-NEXT: .long _var
+; CHECK-DYNAMIC-NOT: LCPI0_1
+
+
+; CHECK-STATIC-LABEL: foo:
+; CHECK-STATIC: ldr r0, LCPI0_0
+; CHECK-STATIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
+
+; CHECK-STATIC: LCPI0_0:
+; CHECK-STATIC-NEXT: .long _var{{$}}
+; CHECK-STATIC-NOT: LCPI0_1
+
+
diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll
index 2c7efc7..2d88b03 100644
--- a/test/CodeGen/ARM/ctz.ll
+++ b/test/CodeGen/ARM/ctz.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s
 
 declare i32 @llvm.cttz.i32(i32, i1)
 
diff --git a/test/CodeGen/ARM/debug-frame-large-stack.ll b/test/CodeGen/ARM/debug-frame-large-stack.ll
new file mode 100644
index 0000000..5bafce9
--- /dev/null
+++ b/test/CodeGen/ARM/debug-frame-large-stack.ll
@@ -0,0 +1,99 @@
+; RUN: llc -filetype=asm -o - < %s -mtriple arm-arm-none-eabi -disable-fp-elim| FileCheck %s --check-prefix=CHECK-ARM
+; RUN: llc -filetype=asm -o - < %s -mtriple arm-arm-none-eabi | FileCheck %s --check-prefix=CHECK-ARM-FP-ELIM
+
+define void @test1() {
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/large.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"large.c", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test1", metadata !"test1", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @test1, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test1]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/large.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5 "}
+!11 = metadata !{i32 2, i32 0, metadata !4, null}
+
+; CHECK-ARM-LABEL: test1:
+; CHECK-ARM: .cfi_startproc
+; CHECK-ARM: sub    sp, sp, #256
+; CHECK-ARM: .cfi_endproc
+
+; CHECK-ARM-FP-ELIM-LABEL: test1:
+; CHECK-ARM-FP-ELIM: .cfi_startproc
+; CHECK-ARM-FP-ELIM: sub    sp, sp, #256
+; CHECK-ARM-FP-ELIM: .cfi_endproc
+
+define void @test2() {
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+; CHECK-ARM-LABEL: test2:
+; CHECK-ARM: .cfi_startproc
+; CHECK-ARM: push    {r4, r5}
+; CHECK-ARM: .cfi_def_cfa_offset 8
+; CHECK-ARM: .cfi_offset r5, -4
+; CHECK-ARM: .cfi_offset r4, -8
+; CHECK-ARM: sub    sp, sp, #72
+; CHECK-ARM: sub    sp, sp, #4096
+; CHECK-ARM: .cfi_def_cfa_offset 4176
+; CHECK-ARM: .cfi_endproc
+
+; CHECK-ARM-FP_ELIM-LABEL: test2:
+; CHECK-ARM-FP_ELIM: .cfi_startproc
+; CHECK-ARM-FP_ELIM: push    {r4, r5}
+; CHECK-ARM-FP_ELIM: .cfi_def_cfa_offset 8
+; CHECK-ARM-FP_ELIM: .cfi_offset 54, -4
+; CHECK-ARM-FP_ELIM: .cfi_offset r4, -8
+; CHECK-ARM-FP_ELIM: sub    sp, sp, #72
+; CHECK-ARM-FP_ELIM: sub    sp, sp, #4096
+; CHECK-ARM-FP_ELIM: .cfi_def_cfa_offset 4176
+; CHECK-ARM-FP_ELIM: .cfi_endproc
+
+define i32 @test3() {
+	%retval = alloca i32, align 4
+	%tmp = alloca i32, align 4
+	%a = alloca [805306369 x i8], align 16
+	store i32 0, i32* %tmp
+	%tmp1 = load i32* %tmp
+        ret i32 %tmp1
+}
+
+; CHECK-ARM-LABEL: test3:
+; CHECK-ARM: .cfi_startproc
+; CHECK-ARM: push    {r4, r5, r11}
+; CHECK-ARM: .cfi_def_cfa_offset 12
+; CHECK-ARM: .cfi_offset r11, -4
+; CHECK-ARM: .cfi_offset r5, -8
+; CHECK-ARM: .cfi_offset r4, -12
+; CHECK-ARM: add    r11, sp, #8
+; CHECK-ARM: .cfi_def_cfa r11, 4
+; CHECK-ARM: sub    sp, sp, #20
+; CHECK-ARM: sub    sp, sp, #805306368
+; CHECK-ARM: bic    sp, sp, #15
+; CHECK-ARM: .cfi_endproc
+
+; CHECK-ARM-FP-ELIM-LABEL: test3:
+; CHECK-ARM-FP-ELIM: .cfi_startproc
+; CHECK-ARM-FP-ELIM: push    {r4, r5, r11}
+; CHECK-ARM-FP-ELIM: .cfi_def_cfa_offset 12
+; CHECK-ARM-FP-ELIM: .cfi_offset r11, -4
+; CHECK-ARM-FP-ELIM: .cfi_offset r5, -8
+; CHECK-ARM-FP-ELIM: .cfi_offset r4, -12
+; CHECK-ARM-FP-ELIM: add    r11, sp, #8
+; CHECK-ARM-FP-ELIM: .cfi_def_cfa r11, 4
+; CHECK-ARM-FP-ELIM: sub    sp, sp, #20
+; CHECK-ARM-FP-ELIM: sub    sp, sp, #805306368
+; CHECK-ARM-FP-ELIM: bic    sp, sp, #15
+; CHECK-ARM-FP-ELIM: .cfi_endproc
+
diff --git a/test/CodeGen/ARM/debug-frame-no-debug.ll b/test/CodeGen/ARM/debug-frame-no-debug.ll
new file mode 100644
index 0000000..81702c6
--- /dev/null
+++ b/test/CodeGen/ARM/debug-frame-no-debug.ll
@@ -0,0 +1,97 @@
+; ARM EHABI integrated test
+
+; This test case checks that the ARM DWARF stack frame directives
+; are not generated if compiling with no debug information.
+  
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-FP
+
+;-------------------------------------------------------------------------------
+; Test 1
+;-------------------------------------------------------------------------------
+; This is the LLVM assembly generated from following C++ code:
+;
+;   extern void print(int, int, int, int, int);
+;   extern void print(double, double, double, double, double);
+;
+;   void test(int a, int b, int c, int d, int e,
+;             double m, double n, double p, double q, double r) {
+;     try {
+;       print(a, b, c, d, e);
+;     } catch (...) {
+;       print(m, n, p, q, r);
+;     }
+;   }
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+define void @_Z4testiiiiiddddd(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e,
+                               double %m, double %n, double %p,
+                               double %q, double %r) {
+entry:
+  invoke void @_Z5printiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e)
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1)
+  invoke void @_Z5printddddd(double %m, double %n, double %p,
+                             double %q, double %r)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret void
+
+lpad1:
+  %3 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:
+  resume { i8*, i32 } %3
+
+terminate.lpad:
+  %4 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %5 = extractvalue { i8*, i32 } %4, 0
+  tail call void @__clang_call_terminate(i8* %5)
+  unreachable
+}
+
+declare void @__clang_call_terminate(i8*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; CHECK-FP-ELIM-NOT:   .cfi_startproc
+; CHECK-FP-ELIM:   push  {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP-ELIM-NOT:   .cfi_def_cfa_offset 36
+
+; CHECK-THUMB-FP-LABEL: _Z4testiiiiiddddd:
+; CHECK-THUMB-FP-NOT:   .cfi_startproc
+; CHECK-THUMB-FP:   push   {r4, r5, r6, r7, lr}
+; CHECK-THUMB-FP-NOT:   .cfi_def_cfa_offset 20
+
diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll
new file mode 100644
index 0000000..9b39525
--- /dev/null
+++ b/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -0,0 +1,141 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi -filetype asm -o - %s | FileCheck %s --check-prefix=CHECK-FP
+; RUN: llc -mtriple arm-unknown-linux-gnueabi -filetype asm -o - %s -disable-fp-elim | FileCheck %s --check-prefix=CHECK-FP-ELIM
+; RUN: llc -mtriple thumb-unknown-linux-gnueabi -filetype asm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-FP
+; RUN: llc -mtriple thumb-unknown-linux-gnueabi -filetype asm -o - %s -disable-fp-elim | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM
+
+; Tests that the initial space allocated to the varargs on the stack is
+; taken into account in the the .cfi_ directives.
+
+; Generated from the C program:
+; #include <stdarg.h>
+;
+; extern int foo(int);
+;
+; int sum(int count, ...) {
+;  va_list vl;
+;  va_start(vl, count);
+;  int sum = 0;
+;  for (int i = 0; i < count; i++) {
+;   sum += foo(va_arg(vl, int));
+;  }
+;  va_end(vl);
+; }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"var.c", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"sum", metadata !"sum", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, ...)* @sum, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [sum]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/var.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5 "}
+!12 = metadata !{i32 786689, metadata !4, metadata !"count", metadata !5, i32 16777221, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [count] [line 5]
+!13 = metadata !{i32 5, i32 0, metadata !4, null}
+!14 = metadata !{i32 786688, metadata !4, metadata !"vl", metadata !5, i32 6, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vl] [line 6]
+!15 = metadata !{i32 786454, metadata !16, null, metadata !"va_list", i32 30, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list]
+!16 = metadata !{metadata !"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", metadata !"/tmp"}
+!17 = metadata !{i32 786454, metadata !1, null, metadata !"__builtin_va_list", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list]
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"__va_list", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"__ap", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ]
+!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ]
+!22 = metadata !{i32 6, i32 0, metadata !4, null}
+!23 = metadata !{i32 7, i32 0, metadata !4, null}
+!24 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 8]
+!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!26 = metadata !{i32 786688, metadata !27, metadata !"i", metadata !5, i32 9, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 9]
+!27 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
+!28 = metadata !{i32 9, i32 0, metadata !27, null}
+!29 = metadata !{i32 10, i32 0, metadata !30, null}
+!30 = metadata !{i32 786443, metadata !1, metadata !27, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
+!31 = metadata !{i32 11, i32 0, metadata !30, null}
+!32 = metadata !{i32 12, i32 0, metadata !4, null}
+!33 = metadata !{i32 13, i32 0, metadata !4, null}
+
+; CHECK-FP-LABEL: sum
+; CHECK-FP: .cfi_startproc
+; CHECK-FP: sub    sp, sp, #16
+; CHECK-FP: .cfi_def_cfa_offset 16
+; CHECK-FP: push   {r4, lr}
+; CHECK-FP: .cfi_def_cfa_offset 24
+; CHECK-FP: .cfi_offset lr, -20
+; CHECK-FP: .cfi_offset r4, -24
+; CHECK-FP: sub    sp, sp, #8
+; CHECK-FP: .cfi_def_cfa_offset 32
+
+; CHECK-FP-ELIM-LABEL: sum
+; CHECK-FP-ELIM: .cfi_startproc
+; CHECK-FP-ELIM: sub    sp, sp, #16
+; CHECK-FP-ELIM: .cfi_def_cfa_offset 16
+; CHECK-FP-ELIM: push   {r4, r11, lr}
+; CHECK-FP-ELIM: .cfi_def_cfa_offset 28
+; CHECK-FP-ELIM: .cfi_offset lr, -20
+; CHECK-FP-ELIM: .cfi_offset r11, -24
+; CHECK-FP-ELIM: .cfi_offset r4, -28
+; CHECK-FP-ELIM: add    r11, sp, #4
+; CHECK-FP-ELIM: .cfi_def_cfa r11, 24
+
+; CHECK-THUMB-FP-LABEL: sum
+; CHECK-THUMB-FP: .cfi_startproc
+; CHECK-THUMB-FP: sub    sp, #16
+; CHECK-THUMB-FP: .cfi_def_cfa_offset 16
+; CHECK-THUMB-FP: push   {r4, r5, r7, lr}
+; CHECK-THUMB-FP: .cfi_def_cfa_offset 32
+; CHECK-THUMB-FP: .cfi_offset lr, -20
+; CHECK-THUMB-FP: .cfi_offset r7, -24
+; CHECK-THUMB-FP: .cfi_offset r5, -28
+; CHECK-THUMB-FP: .cfi_offset r4, -32
+; CHECK-THUMB-FP: sub    sp, #8
+; CHECK-THUMB-FP: .cfi_def_cfa_offset 40
+
+; CHECK-THUMB-FP-ELIM-LABEL: sum
+; CHECK-THUMB-FP-ELIM: .cfi_startproc
+; CHECK-THUMB-FP-ELIM: sub    sp, #16
+; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16
+; CHECK-THUMB-FP-ELIM: push   {r4, r5, r7, lr}
+; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 32
+; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -20
+; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -24
+; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -28
+; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -32
+; CHECK-THUMB-FP-ELIM: add    r7, sp, #8
+; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 24
+
+define i32 @sum(i32 %count, ...) {
+entry:
+  %vl = alloca i8*, align 4
+  %vl1 = bitcast i8** %vl to i8*
+  call void @llvm.va_start(i8* %vl1)
+  %cmp4 = icmp sgt i32 %count, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %ap.cur = load i8** %vl, align 4
+  %ap.next = getelementptr i8* %ap.cur, i32 4
+  store i8* %ap.next, i8** %vl, align 4
+  %0 = bitcast i8* %ap.cur to i32*
+  %1 = load i32* %0, align 4
+  %call = call i32 @foo(i32 %1) #1
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  call void @llvm.va_end(i8* %vl1)
+  ret i32 undef
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare i32 @foo(i32)
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll
new file mode 100644
index 0000000..cf68767
--- /dev/null
+++ b/test/CodeGen/ARM/debug-frame.ll
@@ -0,0 +1,574 @@
+; ARM EHABI integrated test
+
+; This test case checks whether the ARM DWARF stack frame directives
+; are properly generated or not.
+
+; We have to check several cases:
+; (1) arm with -disable-fp-elim
+; (2) arm without -disable-fp-elim
+; (3) armv7 with -disable-fp-elim
+; (4) armv7 without -disable-fp-elim
+; (5) thumb with -disable-fp-elim
+; (6) thumb without -disable-fp-elim
+; (7) thumbv7 with -disable-fp-elim
+; (8) thumbv7 without -disable-fp-elim
+; (9) thumbv7 with -no-integrated-as
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
+
+; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-FP
+
+; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM
+
+; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP
+
+; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP-ELIM
+
+; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \
+; RUN:     -disable-fp-elim -no-integrated-as -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP-NOIAS
+
+;-------------------------------------------------------------------------------
+; Test 1
+;-------------------------------------------------------------------------------
+; This is the LLVM assembly generated from following C++ code:
+;
+;   extern void print(int, int, int, int, int);
+;   extern void print(double, double, double, double, double);
+;
+;   void test(int a, int b, int c, int d, int e,
+;             double m, double n, double p, double q, double r) {
+;     try {
+;       print(a, b, c, d, e);
+;     } catch (...) {
+;       print(m, n, p, q, r);
+;     }
+;   }
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+define void @_Z4testiiiiiddddd(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e,
+                               double %m, double %n, double %p,
+                               double %q, double %r) {
+entry:
+  invoke void @_Z5printiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e)
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1)
+  invoke void @_Z5printddddd(double %m, double %n, double %p,
+                             double %q, double %r)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret void
+
+lpad1:
+  %3 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:
+  resume { i8*, i32 } %3
+
+terminate.lpad:
+  %4 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %5 = extractvalue { i8*, i32 } %4, 0
+  tail call void @__clang_call_terminate(i8* %5)
+  unreachable
+}
+
+declare void @__clang_call_terminate(i8*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/exp.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"exp.cpp", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"_Z4testiiiiiddddd", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i32, i32, i32, i32, double, double, double, double, double)* @_Z4testiiiiiddddd, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 5] [test]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/exp.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8, metadata !8, metadata !8, metadata !8, metadata !8, metadata !9, metadata !9, metadata !9, metadata !9, metadata !9}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!12 = metadata !{metadata !"clang version 3.5 "}
+!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!14 = metadata !{i32 4, i32 0, metadata !4, null}
+!15 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 33554436, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 4]
+!16 = metadata !{i32 786689, metadata !4, metadata !"c", metadata !5, i32 50331652, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 4]
+!17 = metadata !{i32 786689, metadata !4, metadata !"d", metadata !5, i32 67108868, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [d] [line 4]
+!18 = metadata !{i32 786689, metadata !4, metadata !"e", metadata !5, i32 83886084, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [e] [line 4]
+!19 = metadata !{i32 786689, metadata !4, metadata !"m", metadata !5, i32 100663301, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [m] [line 5]
+!20 = metadata !{i32 5, i32 0, metadata !4, null}
+!21 = metadata !{i32 786689, metadata !4, metadata !"n", metadata !5, i32 117440517, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [n] [line 5]
+!22 = metadata !{i32 786689, metadata !4, metadata !"p", metadata !5, i32 134217733, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 5]
+!23 = metadata !{i32 786689, metadata !4, metadata !"q", metadata !5, i32 150994949, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [q] [line 5]
+!24 = metadata !{i32 786689, metadata !4, metadata !"r", metadata !5, i32 167772165, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 5]
+!25 = metadata !{i32 7, i32 0, metadata !26, null}
+!26 = metadata !{i32 786443, metadata !1, metadata !4, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp]
+!27 = metadata !{i32 8, i32 0, metadata !26, null} ; [ DW_TAG_imported_declaration ]
+!28 = metadata !{i32 11, i32 0, metadata !26, null}
+!29 = metadata !{i32 9, i32 0, metadata !30, null}
+!30 = metadata !{i32 786443, metadata !1, metadata !4, i32 8, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp]
+!31 = metadata !{i32 10, i32 0, metadata !30, null}
+!32 = metadata !{i32 10, i32 0, metadata !4, null}
+!33 = metadata !{i32 11, i32 0, metadata !4, null}
+!34 = metadata !{i32 11, i32 0, metadata !30, null}
+
+; CHECK-FP-LABEL: _Z4testiiiiiddddd:
+; CHECK-FP:   .cfi_startproc
+; CHECK-FP:   push   {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP:   .cfi_def_cfa_offset 36
+; CHECK-FP:   .cfi_offset lr, -4
+; CHECK-FP:   .cfi_offset r11, -8
+; CHECK-FP:   .cfi_offset r10, -12
+; CHECK-FP:   .cfi_offset r9, -16
+; CHECK-FP:   .cfi_offset r8, -20
+; CHECK-FP:   .cfi_offset r7, -24
+; CHECK-FP:   .cfi_offset r6, -28
+; CHECK-FP:   .cfi_offset r5, -32
+; CHECK-FP:   .cfi_offset r4, -36
+; CHECK-FP:   add    r11, sp, #28
+; CHECK-FP:   .cfi_def_cfa r11, 8
+; CHECK-FP:   sub    sp, sp, #28
+; CHECK-FP:   .cfi_endproc
+
+; CHECK-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; CHECK-FP-ELIM:   .cfi_startproc
+; CHECK-FP-ELIM:   push  {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP-ELIM:   .cfi_def_cfa_offset 36
+; CHECK-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-FP-ELIM:   .cfi_offset r11, -8
+; CHECK-FP-ELIM:   .cfi_offset r10, -12
+; CHECK-FP-ELIM:   .cfi_offset r9, -16
+; CHECK-FP-ELIM:   .cfi_offset r8, -20
+; CHECK-FP-ELIM:   .cfi_offset r7, -24
+; CHECK-FP-ELIM:   .cfi_offset r6, -28
+; CHECK-FP-ELIM:   .cfi_offset r5, -32
+; CHECK-FP-ELIM:   .cfi_offset r4, -36
+; CHECK-FP-ELIM:   sub   sp, sp, #28
+; CHECK-FP-ELIM:   .cfi_def_cfa_offset 64
+; CHECK-FP-ELIM:   .cfi_endproc
+
+; CHECK-V7-FP-LABEL: _Z4testiiiiiddddd:
+; CHECK-V7-FP:   .cfi_startproc
+; CHECK-V7-FP:   push   {r4, r11, lr}
+; CHECK-V7-FP:   .cfi_def_cfa_offset 12
+; CHECK-V7-FP:   .cfi_offset lr, -4
+; CHECK-V7-FP:   .cfi_offset r11, -8
+; CHECK-V7-FP:   .cfi_offset r4, -12
+; CHECK-V7-FP:   add    r11, sp, #4
+; CHECK-V7-FP:   .cfi_def_cfa r11, 8
+; CHECK-V7-FP:   vpush  {d8, d9, d10, d11, d12}
+; CHECK-V7-FP:   .cfi_offset d12, -24
+; CHECK-V7-FP:   .cfi_offset d11, -32
+; CHECK-V7-FP:   .cfi_offset d10, -40
+; CHECK-V7-FP:   .cfi_offset d9, -48
+; CHECK-V7-FP:   .cfi_offset d8, -56
+; CHECK-V7-FP:   sub    sp, sp, #28
+; CHECK-V7-FP:   .cfi_endproc
+
+; CHECK-V7-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; CHECK-V7-FP-ELIM:   .cfi_startproc
+; CHECK-V7-FP-ELIM:   push   {r4, lr}
+; CHECK-V7-FP-ELIM:   .cfi_def_cfa_offset 8
+; CHECK-V7-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-V7-FP-ELIM:   .cfi_offset r4, -8
+; CHECK-V7-FP-ELIM:   vpush  {d8, d9, d10, d11, d12}
+; CHECK-V7-FP-ELIM:   .cfi_def_cfa_offset 48
+; CHECK-V7-FP-ELIM:   .cfi_offset d12, -16
+; CHECK-V7-FP-ELIM:   .cfi_offset d11, -24
+; CHECK-V7-FP-ELIM:   .cfi_offset d10, -32
+; CHECK-V7-FP-ELIM:   .cfi_offset d9, -40
+; CHECK-V7-FP-ELIM:   .cfi_offset d8, -48
+; CHECK-V7-FP-ELIM:   sub    sp, sp, #24
+; CHECK-V7-FP-ELIM:   .cfi_def_cfa_offset 72
+; CHECK-V7-FP-ELIM:   .cfi_endproc
+
+; CHECK-THUMB-FP-LABEL: _Z4testiiiiiddddd:
+; CHECK-THUMB-FP:   .cfi_startproc
+; CHECK-THUMB-FP:   push   {r4, r5, r6, r7, lr}
+; CHECK-THUMB-FP:   .cfi_def_cfa_offset 20
+; CHECK-THUMB-FP:   .cfi_offset lr, -4
+; CHECK-THUMB-FP:   .cfi_offset r7, -8
+; CHECK-THUMB-FP:   .cfi_offset r6, -12
+; CHECK-THUMB-FP:   .cfi_offset r5, -16
+; CHECK-THUMB-FP:   .cfi_offset r4, -20
+; CHECK-THUMB-FP:   add    r7, sp, #12
+; CHECK-THUMB-FP:   .cfi_def_cfa r7, 8
+; CHECK-THUMB-FP:   sub    sp, #60
+; CHECK-THUMB-FP:   .cfi_endproc
+
+; CHECK-THUMB-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; CHECK-THUMB-FP-ELIM:   .cfi_startproc
+; CHECK-THUMB-FP-ELIM:   push   {r4, r5, r6, r7, lr}
+; CHECK-THUMB-FP-ELIM:   .cfi_def_cfa_offset 20
+; CHECK-THUMB-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-THUMB-FP-ELIM:   .cfi_offset r7, -8
+; CHECK-THUMB-FP-ELIM:   .cfi_offset r6, -12
+; CHECK-THUMB-FP-ELIM:   .cfi_offset r5, -16
+; CHECK-THUMB-FP-ELIM:   .cfi_offset r4, -20
+; CHECK-THUMB-FP-ELIM:   sub    sp, #60
+; CHECK-THUMB-FP-ELIM:   .cfi_def_cfa_offset 80
+; CHECK-THUMB-FP-ELIM:   .cfi_endproc
+
+; CHECK-THUMB-V7-FP-LABEL: _Z4testiiiiiddddd:
+; CHECK-THUMB-V7-FP:   .cfi_startproc
+; CHECK-THUMB-V7-FP:   push.w   {r4, r7, r11, lr}
+; CHECK-THUMB-V7-FP:   .cfi_def_cfa_offset 16
+; CHECK-THUMB-V7-FP:   .cfi_offset lr, -4
+; CHECK-THUMB-V7-FP:   .cfi_offset r11, -8
+; CHECK-THUMB-V7-FP:   .cfi_offset r7, -12
+; CHECK-THUMB-V7-FP:   .cfi_offset r4, -16
+; CHECK-THUMB-V7-FP:   add    r7, sp, #4
+; CHECK-THUMB-V7-FP:   .cfi_def_cfa r7, 12
+; CHECK-THUMB-V7-FP:   vpush  {d8, d9, d10, d11, d12}
+; CHECK-THUMB-V7-FP:   .cfi_offset d12, -24
+; CHECK-THUMB-V7-FP:   .cfi_offset d11, -32
+; CHECK-THUMB-V7-FP:   .cfi_offset d10, -40
+; CHECK-THUMB-V7-FP:   .cfi_offset d9, -48
+; CHECK-THUMB-V7-FP:   .cfi_offset d8, -56
+; CHECK-THUMB-V7-FP:   sub    sp, #24
+; CHECK-THUMB-V7-FP:   .cfi_endproc
+
+; CHECK-THUMB-V7-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_startproc
+; CHECK-THUMB-V7-FP-ELIM:   push   {r4, lr}
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_def_cfa_offset 8
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset r4, -8
+; CHECK-THUMB-V7-FP-ELIM:   vpush  {d8, d9, d10, d11, d12}
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_def_cfa_offset 48
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset d12, -16
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset d11, -24
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset d10, -32
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset d9, -40
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset d8, -48
+; CHECK-THUMB-V7-FP-ELIM:   sub    sp, #24
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_def_cfa_offset 72
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_endproc
+
+; CHECK-THUMB-V7-FP-NOIAS-LABEL: _Z4testiiiiiddddd:
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_startproc
+; CHECK-THUMB-V7-FP-NOIAS:   push.w   {r4, r7, r11, lr}
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_def_cfa_offset 16
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 14, -4
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 11, -8
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 7, -12
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 4, -16
+; CHECK-THUMB-V7-FP-NOIAS:   add    r7, sp, #4
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_def_cfa 7, 12
+; CHECK-THUMB-V7-FP-NOIAS:   vpush  {d8, d9, d10, d11, d12}
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 268, -24
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 267, -32
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 266, -40
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 265, -48
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_offset 264, -56
+; CHECK-THUMB-V7-FP-NOIAS:   sub    sp, #24
+; CHECK-THUMB-V7-FP-NOIAS:   .cfi_endproc
+
+;-------------------------------------------------------------------------------
+; Test 2
+;-------------------------------------------------------------------------------
+
+declare void @throw_exception_2()
+
+define void @test2() {
+entry:
+  call void @throw_exception_2()
+  ret void
+}
+
+; CHECK-FP-LABEL: test2:
+; CHECK-FP:   .cfi_startproc
+; CHECK-FP:   push   {r11, lr}
+; CHECK-FP:   .cfi_def_cfa_offset 8
+; CHECK-FP:   .cfi_offset lr, -4
+; CHECK-FP:   .cfi_offset r11, -8
+; CHECK-FP:   mov    r11, sp
+; CHECK-FP:   .cfi_def_cfa_register r11
+; CHECK-FP:   pop    {r11, lr}
+; CHECK-FP:   mov    pc, lr
+; CHECK-FP:   .cfi_endproc
+
+; CHECK-FP-ELIM-LABEL: test2:
+; CHECK-FP-ELIM:   .cfi_startproc
+; CHECK-FP-ELIM:   push  {r11, lr}
+; CHECK-FP-ELIM:   .cfi_def_cfa_offset 8
+; CHECK-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-FP-ELIM:   .cfi_offset r11, -8
+; CHECK-FP-ELIM:   pop   {r11, lr}
+; CHECK-FP-ELIM:   mov   pc, lr
+; CHECK-FP-ELIM:   .cfi_endproc
+
+; CHECK-V7-FP-LABEL: test2:
+; CHECK-V7-FP:   .cfi_startproc
+; CHECK-V7-FP:   push   {r11, lr}
+; CHECK-V7-FP:   .cfi_def_cfa_offset 8
+; CHECK-V7-FP:   .cfi_offset lr, -4
+; CHECK-V7-FP:   .cfi_offset r11, -8
+; CHECK-V7-FP:   mov    r11, sp
+; CHECK-V7-FP:   .cfi_def_cfa_register r11
+; CHECK-V7-FP:   pop    {r11, pc}
+; CHECK-V7-FP:   .cfi_endproc
+
+; CHECK-V7-FP-ELIM-LABEL: test2:
+; CHECK-V7-FP-ELIM:   .cfi_startproc
+; CHECK-V7-FP-ELIM:   push  {r11, lr}
+; CHECK-V7-FP-ELIM:   .cfi_def_cfa_offset 8
+; CHECK-V7-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-V7-FP-ELIM:   .cfi_offset r11, -8
+; CHECK-V7-FP-ELIM:   pop   {r11, pc}
+; CHECK-V7-FP-ELIM:   .cfi_endproc
+
+; CHECK-THUMB-FP-LABEL: test2:
+; CHECK-THUMB-FP:   .cfi_startproc
+; CHECK-THUMB-FP:   push   {r7, lr}
+; CHECK-THUMB-FP:   .cfi_def_cfa_offset 8
+; CHECK-THUMB-FP:   .cfi_offset lr, -4
+; CHECK-THUMB-FP:   .cfi_offset r7, -8
+; CHECK-THUMB-FP:   add    r7, sp, #0
+; CHECK-THUMB-FP:   .cfi_def_cfa_register r7
+; CHECK-THUMB-FP:   pop    {r7, pc}
+; CHECK-THUMB-FP:   .cfi_endproc
+
+; CHECK-THUMB-FP-ELIM-LABEL: test2:
+; CHECK-THUMB-FP-ELIM:   .cfi_startproc
+; CHECK-THUMB-FP-ELIM:   push  {r7, lr}
+; CHECK-THUMB-FP-ELIM:   .cfi_def_cfa_offset 8
+; CHECK-THUMB-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-THUMB-FP-ELIM:   .cfi_offset r7, -8
+; CHECK-THUMB-FP-ELIM:   pop   {r7, pc}
+; CHECK-THUMB-FP-ELIM:   .cfi_endproc
+
+; CHECK-THUMB-V7-FP-LABEL: test2:
+; CHECK-THUMB-V7-FP:   .cfi_startproc
+; CHECK-THUMB-V7-FP:   push   {r7, lr}
+; CHECK-THUMB-V7-FP:   .cfi_def_cfa_offset 8
+; CHECK-THUMB-V7-FP:   .cfi_offset lr, -4
+; CHECK-THUMB-V7-FP:   .cfi_offset r7, -8
+; CHECK-THUMB-V7-FP:   mov    r7, sp
+; CHECK-THUMB-V7-FP:   .cfi_def_cfa_register r7
+; CHECK-THUMB-V7-FP:   pop    {r7, pc}
+; CHECK-THUMB-V7-FP:   .cfi_endproc
+
+; CHECK-THUMB-V7-FP-ELIM-LABEL: test2:
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_startproc
+; CHECK-THUMB-V7-FP-ELIM:   push.w  {r11, lr}
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_def_cfa_offset 8
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset r11, -8
+; CHECK-THUMB-V7-FP-ELIM:   pop.w   {r11, pc}
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_endproc
+
+
+;-------------------------------------------------------------------------------
+; Test 3
+;-------------------------------------------------------------------------------
+
+declare void @throw_exception_3(i32)
+
+define i32 @test3(i32 %a, i32 %b, i32 %c, i32 %d,
+                  i32 %e, i32 %f, i32 %g, i32 %h) {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  %add2 = add nsw i32 %add1, %d
+  tail call void @throw_exception_3(i32 %add2)
+  %add3 = add nsw i32 %f, %e
+  %add4 = add nsw i32 %add3, %g
+  %add5 = add nsw i32 %add4, %h
+  tail call void @throw_exception_3(i32 %add5)
+  %add6 = add nsw i32 %add5, %add2
+  ret i32 %add6
+}
+
+; CHECK-FP-LABEL: test3:
+; CHECK-FP:   .cfi_startproc
+; CHECK-FP:   push   {r4, r5, r11, lr}
+; CHECK-FP:   .cfi_def_cfa_offset 16
+; CHECK-FP:   .cfi_offset lr, -4
+; CHECK-FP:   .cfi_offset r11, -8
+; CHECK-FP:   .cfi_offset r5, -12
+; CHECK-FP:   .cfi_offset r4, -16
+; CHECK-FP:   add    r11, sp, #8
+; CHECK-FP:   .cfi_def_cfa r11, 8
+; CHECK-FP:   pop    {r4, r5, r11, lr}
+; CHECK-FP:   mov    pc, lr
+; CHECK-FP:   .cfi_endproc
+
+; CHECK-FP-ELIM-LABEL: test3:
+; CHECK-FP-ELIM:   .cfi_startproc
+; CHECK-FP-ELIM:   push  {r4, r5, r11, lr}
+; CHECK-FP-ELIM:   .cfi_def_cfa_offset 16
+; CHECK-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-FP-ELIM:   .cfi_offset r11, -8
+; CHECK-FP-ELIM:   .cfi_offset r5, -12
+; CHECK-FP-ELIM:   .cfi_offset r4, -16
+; CHECK-FP-ELIM:   pop   {r4, r5, r11, lr}
+; CHECK-FP-ELIM:   mov   pc, lr
+; CHECK-FP-ELIM:   .cfi_endproc
+
+; CHECK-V7-FP-LABEL: test3:
+; CHECK-V7-FP:   .cfi_startproc
+; CHECK-V7-FP:   push   {r4, r5, r11, lr}
+; CHECK-V7-FP:   .cfi_def_cfa_offset 16
+; CHECK-V7-FP:   .cfi_offset lr, -4
+; CHECK-V7-FP:   .cfi_offset r11, -8
+; CHECK-V7-FP:   .cfi_offset r5, -12
+; CHECK-V7-FP:   .cfi_offset r4, -16
+; CHECK-V7-FP:   add    r11, sp, #8
+; CHECK-V7-FP:   .cfi_def_cfa r11, 8
+; CHECK-V7-FP:   pop    {r4, r5, r11, pc}
+; CHECK-V7-FP:   .cfi_endproc
+
+; CHECK-V7-FP-ELIM-LABEL: test3:
+; CHECK-V7-FP-ELIM:   .cfi_startproc
+; CHECK-V7-FP-ELIM:   push  {r4, r5, r11, lr}
+; CHECK-V7-FP-ELIM:   .cfi_def_cfa_offset 16
+; CHECK-V7-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-V7-FP-ELIM:   .cfi_offset r11, -8
+; CHECK-V7-FP-ELIM:   .cfi_offset r5, -12
+; CHECK-V7-FP-ELIM:   .cfi_offset r4, -16
+; CHECK-V7-FP-ELIM:   pop   {r4, r5, r11, pc}
+; CHECK-V7-FP-ELIM:   .cfi_endproc
+
+; CHECK-THUMB-FP-LABEL: test3:
+; CHECK-THUMB-FP:   .cfi_startproc
+; CHECK-THUMB-FP:   push   {r4, r5, r7, lr}
+; CHECK-THUMB-FP:   .cfi_def_cfa_offset 16
+; CHECK-THUMB-FP:   .cfi_offset lr, -4
+; CHECK-THUMB-FP:   .cfi_offset r7, -8
+; CHECK-THUMB-FP:   .cfi_offset r5, -12
+; CHECK-THUMB-FP:   .cfi_offset r4, -16
+; CHECK-THUMB-FP:   add    r7, sp, #8
+; CHECK-THUMB-FP:   .cfi_def_cfa r7, 8
+; CHECK-THUMB-FP:   pop    {r4, r5, r7, pc}
+; CHECK-THUMB-FP:   .cfi_endproc
+
+; CHECK-THUMB-FP-ELIM-LABEL: test3:
+; CHECK-THUMB-FP-ELIM:   .cfi_startproc
+; CHECK-THUMB-FP-ELIM:   push  {r4, r5, r7, lr}
+; CHECK-THUMB-FP-ELIM:   .cfi_def_cfa_offset 16
+; CHECK-THUMB-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-THUMB-FP-ELIM:   .cfi_offset r7, -8
+; CHECK-THUMB-FP-ELIM:   .cfi_offset r5, -12
+; CHECK-THUMB-FP-ELIM:   .cfi_offset r4, -16
+; CHECK-THUMB-FP-ELIM:   pop   {r4, r5, r7, pc}
+; CHECK-THUMB-FP-ELIM:   .cfi_endproc
+
+; CHECK-THUMB-V7-FP-LABEL: test3:
+; CHECK-THUMB-V7-FP:   .cfi_startproc
+; CHECK-THUMB-V7-FP:   push   {r4, r5, r7, lr}
+; CHECK-THUMB-V7-FP:   .cfi_def_cfa_offset 16
+; CHECK-THUMB-V7-FP:   .cfi_offset lr, -4
+; CHECK-THUMB-V7-FP:   .cfi_offset r7, -8
+; CHECK-THUMB-V7-FP:   .cfi_offset r5, -12
+; CHECK-THUMB-V7-FP:   .cfi_offset r4, -16
+; CHECK-THUMB-V7-FP:   add    r7, sp, #8
+; CHECK-THUMB-V7-FP:   .cfi_def_cfa r7, 8
+; CHECK-THUMB-V7-FP:   pop    {r4, r5, r7, pc}
+; CHECK-THUMB-V7-FP:   .cfi_endproc
+
+; CHECK-THUMB-V7-FP-ELIM-LABEL: test3:
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_startproc
+; CHECK-THUMB-V7-FP-ELIM:   push.w  {r4, r5, r11, lr}
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_def_cfa_offset 16
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset lr, -4
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset r11, -8
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset r5, -12
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_offset r4, -16
+; CHECK-THUMB-V7-FP-ELIM:   pop.w   {r4, r5, r11, pc}
+; CHECK-THUMB-V7-FP-ELIM:   .cfi_endproc
+
+
+;-------------------------------------------------------------------------------
+; Test 4
+;-------------------------------------------------------------------------------
+
+define void @test4() nounwind {
+entry:
+  ret void
+}
+
+; CHECK-FP-LABEL: test4:
+; CHECK-FP:   mov pc, lr
+; CHECK-FP-NOT:   .cfi_def_cfa_offset
+
+; CHECK-FP-ELIM-LABEL: test4:
+; CHECK-FP-ELIM:   mov pc, lr
+; CHECK-FP-ELIM-NOT:   .cfi_def_cfa_offset
+
+; CHECK-V7-FP-LABEL: test4:
+; CHECK-V7-FP:   bx lr
+; CHECK-V7-FP-NOT:   .cfi_def_cfa_offset
+
+; CHECK-V7-FP-ELIM-LABEL: test4:
+; CHECK-V7-FP-ELIM:   bx lr
+; CHECK-V7-FP-ELIM-NOT:   .cfi_def_cfa_offset
+
+; CHECK-THUMB-FP-LABEL: test4:
+; CHECK-THUMB-FP:   bx lr
+; CHECK-THUMB-FP-NOT:   .cfi_def_cfa_offset
+
+; CHECK-THUMB-FP-ELIM-LABEL: test4:
+; CHECK-THUMB-FP-ELIM:   bx lr
+; CHECK-THUMB-FP-ELIM-NOT:   .cfi_def_cfa_offset
+
+; CHECK-THUMB-V7-FP-LABEL: test4:
+; CHECK-THUMB-V7-FP:   bx lr
+; CHECK-THUMB-V7-FP-NOT:   .cfi_def_cfa_offset
+
+; CHECK-THUMB-V7-FP-ELIM-LABEL: test4:
+; CHECK-THUMB-V7-FP-ELIM:   bx lr
+; CHECK-THUMB-V7-FP-ELIM-NOT:   .cfi_def_cfa_offset
+
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index ee515fd5..03ce312 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -2,13 +2,15 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.6.7"
 
-;CHECK: DW_OP_regx for Q register: D1
+;CHECK: sub-register
+;CHECK-NEXT: DW_OP_regx
 ;CHECK-NEXT: ascii
-;CHECK-NEXT: DW_OP_piece 8
+;CHECK-NEXT: DW_OP_piece
 ;CHECK-NEXT: byte   8
-;CHECK-NEXT: DW_OP_regx for Q register: D2
+;CHECK-NEXT: sub-register
+;CHECK-NEXT: DW_OP_regx
 ;CHECK-NEXT: ascii
-;CHECK-NEXT: DW_OP_piece 8
+;CHECK-NEXT: DW_OP_piece
 ;CHECK-NEXT: byte   8
 
 @.str = external constant [13 x i8]
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index e92d977..ee9faf8 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -1,9 +1,11 @@
 ; RUN: llc < %s - | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for s16
-;CHECK: DW_OP_regx for S register
+;CHECK: super-register
+;CHECK-NEXT: DW_OP_regx
 ;CHECK-NEXT: ascii
-;CHECK-NEXT: DW_OP_bit_piece 32 0
+;CHECK-NEXT: DW_OP_piece
+;CHECK-NEXT: 4
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.6.7"
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index 854fcab..71a696a 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -3,13 +3,19 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.6.7"
 
-;CHECK: Ldebug_loc0:
-;CHECK-NEXT:        .long   Ltmp0
-;CHECK-NEXT:        .long   Ltmp1
+;CHECK-LABEL: Lfunc_begin0:
+;CHECK: Ltmp[[K:[0-9]+]]:
+;CHECK: Ltmp[[L:[0-9]+]]:
+;CHECK-LABEL: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp[[K]]
+;CHECK-NEXT:        .long   Ltmp[[L]]
 ;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
 ;CHECK-NEXT:        .short  Lset[[N]]
 ;CHECK-NEXT: Ltmp[[M]]:
-;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
+;CHECK-NEXT:        .byte   144                     @ super-register
+;CHECK-NEXT:                                        @ DW_OP_regx
+;CHECK-NEXT:        .ascii
+;CHECK-NEXT:        .byte   {{[0-9]+}}              @ DW_OP_{{.*}}piece
 
 define void @_Z3foov() optsize ssp {
 entry:
diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll
new file mode 100644
index 0000000..b0dc467
--- /dev/null
+++ b/test/CodeGen/ARM/debug-segmented-stacks.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+define void @test_basic() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+	ret void
+
+; ARM-linux:      test_basic:
+
+; ARM-linux:      push    {r4, r5}
+; ARM-linux:      .cfi_def_cfa_offset 8
+; ARM-linux:      .cfi_offset r5, -4
+; ARM-linux:      .cfi_offset r4, -8
+; ARM-linux-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-linux-NEXT: mov     r5, sp
+; ARM-linux-NEXT: ldr     r4, [r4, #4]
+; ARM-linux-NEXT: cmp     r4, r5
+; ARM-linux-NEXT: blo     .LBB0_2
+
+; ARM-linux:      mov     r4, #48
+; ARM-linux-NEXT: mov     r5, #0
+; ARM-linux-NEXT: stmdb   sp!, {lr}
+; ARM-linux:      .cfi_def_cfa_offset 12
+; ARM-linux:      .cfi_offset lr, -12
+; ARM-linux-NEXT: bl      __morestack
+; ARM-linux-NEXT: ldm     sp!, {lr}
+; ARM-linux-NEXT: pop     {r4, r5}
+; ARM-linux:      .cfi_def_cfa_offset 0
+; ARM-linux-NEXT: bx      lr
+
+; ARM-linux:      pop     {r4, r5}
+; ARM-linux:      .cfi_def_cfa_offset 0
+; ARM-linux       .cfi_same_value r4
+; ARM-linux       .cfi_same_value r5
+}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"var.c", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_basic",
+  metadata !"test_basic", metadata !"", i32 5, metadata !6, i1 false, i1 true,
+  i32 0, i32 0, null, i32 256, i1 false, void ()* @test_basic, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [sum]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/var.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5 "}
+!12 = metadata !{i32 786689, metadata !4, metadata !"count", metadata !5, i32 16777221, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [count] [line 5]
+!13 = metadata !{i32 5, i32 0, metadata !4, null}
+!14 = metadata !{i32 786688, metadata !4, metadata !"vl", metadata !5, i32 6, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vl] [line 6]
+!15 = metadata !{i32 786454, metadata !16, null, metadata !"va_list", i32 30, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list]
+!16 = metadata !{metadata !"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", metadata !"/tmp"}
+!17 = metadata !{i32 786454, metadata !1, null, metadata !"__builtin_va_list", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list]
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"__va_list", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"__ap", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ]
+!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ]
+!22 = metadata !{i32 6, i32 0, metadata !4, null}
+!23 = metadata !{i32 7, i32 0, metadata !4, null}
+!24 = metadata !{i32 786688, metadata !4, metadata !"test_basic", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 8]
+!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!26 = metadata !{i32 786688, metadata !27, metadata !"i", metadata !5, i32 9, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 9]
+!27 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
+!28 = metadata !{i32 9, i32 0, metadata !27, null}
+!29 = metadata !{i32 10, i32 0, metadata !30, null}
+!30 = metadata !{i32 786443, metadata !1, metadata !27, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
+!31 = metadata !{i32 11, i32 0, metadata !30, null}
+!32 = metadata !{i32 12, i32 0, metadata !4, null}
+!33 = metadata !{i32 13, i32 0, metadata !4, null}
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
diff --git a/test/CodeGen/ARM/default-float-abi.ll b/test/CodeGen/ARM/default-float-abi.ll
new file mode 100644
index 0000000..1b26bbd
--- /dev/null
+++ b/test/CodeGen/ARM/default-float-abi.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-HARD
+; RUN: llc -mtriple=armv7-linux-eabihf %s -o - | FileCheck %s --check-prefix=CHECK-HARD
+; RUN: llc -mtriple=armv7-linux-gnueabihf -float-abi=soft %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
+; RUN: llc -mtriple=armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
+; RUN: llc -mtriple=armv7-linux-eabi -float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD
+; RUN: llc -mtriple=thumbv7-apple-ios6.0 %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
+
+define float @test_abi(float %lhs, float %rhs) {
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+
+; CHECK-HARD-LABEL: test_abi:
+; CHECK-HARD-NOT: vmov
+; CHECK-HARD: vadd.f32 s0, s0, s1
+; CHECK-HARD-NOT: vmov
+
+; CHECK-SOFT-LABEL: test_abi:
+; CHECK-SOFT-DAG: vmov [[LHS:s[0-9]+]], r0
+; CHECK-SOFT-DAG: vmov [[RHS:s[0-9]+]], r1
+; CHECK-SOFT: vadd.f32 [[DEST:s[0-9]+]], [[LHS]], [[RHS]]
+; CHECK-SOFT: vmov r0, [[DEST]]
+}
diff --git a/test/CodeGen/ARM/divmod-eabi.ll b/test/CodeGen/ARM/divmod-eabi.ll
index 404cae0..7f72048 100644
--- a/test/CodeGen/ARM/divmod-eabi.ll
+++ b/test/CodeGen/ARM/divmod-eabi.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI
+; RUN: llc -mtriple armv7-none-eabihf %s -o - | FileCheck %s --check-prefix=EABI
 ; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU
 ; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN
+; FIXME: long-term, we will use "-apple-macho" and won't need this exception:
+; RUN: llc -mtriple armv7-apple-darwin-eabi %s -o - | FileCheck %s --check-prefix=DARWIN
 
 define signext i16 @f16(i16 signext %a, i16 signext %b) {
 ; EABI-LABEL: f16:
@@ -186,7 +189,7 @@ entry:
   %div = sdiv i32 %a, %b
 ; EABI: __aeabi_idivmod
 ; EABI: mov [[div:r[0-9]+]], r0
-; GNU __aeabi_idiv
+; GNU: __aeabi_idiv
 ; GNU: mov [[sum:r[0-9]+]], r0
 ; DARWIN: ___divsi3
 ; DARWIN: mov [[sum:r[0-9]+]], r0
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index de2820e..4ac5b8a 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 %struct.comment = type { i8**, i32*, i32, i8* }
 %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll
index cb5291b..f86b66c 100644
--- a/test/CodeGen/ARM/ehabi-filters.ll
+++ b/test/CodeGen/ARM/ehabi-filters.ll
@@ -1,4 +1,4 @@
-; RUN: llc -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s
+; RUN: llc < %s | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 target triple = "armv7-none-linux-gnueabi"
 
diff --git a/test/CodeGen/ARM/ehabi-no-landingpad.ll b/test/CodeGen/ARM/ehabi-no-landingpad.ll
index ac0dff4..d5c74c5 100644
--- a/test/CodeGen/ARM/ehabi-no-landingpad.ll
+++ b/test/CodeGen/ARM/ehabi-no-landingpad.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-unknown-linux-gnueabi \
-; RUN:   -arm-enable-ehabi -arm-enable-ehabi-descriptors | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-unknown-linux-gnueabi | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 target triple = "armv7-unknown-linux-gnueabi"
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
index fd7d0e6..a86f340 100644
--- a/test/CodeGen/ARM/ehabi-unwind.ll
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -1,8 +1,7 @@
 ; Test that the EHABI unwind instruction generator does not encounter any
 ; unfamiliar instructions.
-; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
-; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
-; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+; RUN: llc < %s -mtriple=thumbv7 -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7
 
 define void @_Z1fv() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/ehabi.ll b/test/CodeGen/ARM/ehabi.ll
index 6644652..720cc3c 100644
--- a/test/CodeGen/ARM/ehabi.ll
+++ b/test/CodeGen/ARM/ehabi.ll
@@ -19,22 +19,34 @@
 ; (4) armv7 without -disable-fp-elim
 
 ; RUN: llc -mtriple arm-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -disable-fp-elim -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-FP
 
 ; RUN: llc -mtriple arm-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
 ; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -disable-fp-elim -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP
 
 ; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
+
+; RUN: llc -mtriple arm-unknown-linux-androideabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP
+
+; RUN: llc -mtriple arm-unknown-linux-androideabi \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-androideabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP
+
+; RUN: llc -mtriple armv7-unknown-linux-androideabi \
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
 
@@ -169,7 +181,7 @@ declare void @throw_exception_2()
 
 define void @test2() {
 entry:
-  tail call void @throw_exception_2()
+  call void @throw_exception_2()
   ret void
 }
 
diff --git a/test/CodeGen/ARM/extload-knownzero.ll b/test/CodeGen/ARM/extload-knownzero.ll
index 8ccf58c..f55b951 100644
--- a/test/CodeGen/ARM/extload-knownzero.ll
+++ b/test/CodeGen/ARM/extload-knownzero.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 ; rdar://12771555
 
 define void @foo(i16* %ptr, i32 %a) nounwind {
diff --git a/test/CodeGen/ARM/extloadi1.ll b/test/CodeGen/ARM/extloadi1.ll
index dc45ce7..2504c6c 100644
--- a/test/CodeGen/ARM/extloadi1.ll
+++ b/test/CodeGen/ARM/extloadi1.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
+
 @handler_installed.6144.b = external global i1          ; <i1*> [#uses=1]
 
 define void @__mf_sigusr1_respond() {
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 21219ce..b5d3bda 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -1,9 +1,20 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
-; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP2
+
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NFP0
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8U
+
+; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8U
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 917a15d..2d7378e 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -8,8 +8,6 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
 
-; XFAIL: vg_leak
-
 ; Note that some of these tests assume that relocations are either
 ; movw/movt or constant pool loads. Different platforms will select
 ; different approaches.
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
index d606877..cccd9eb 100644
--- a/test/CodeGen/ARM/fast-isel-crash2.ll
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=armv7-linux-gnueabi
 ; rdar://9515076
 ; (Make sure this doesn't crash.)
 
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
index 8542bb5..93cdbbb 100644
--- a/test/CodeGen/ARM/fast-isel-frameaddr.ll
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=DARWIN-ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=DARWIN-THUMB2
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
 
 define i8* @frameaddr_index0() nounwind {
@@ -34,14 +34,12 @@ entry:
 ; DARWIN-ARM-LABEL: frameaddr_index1:
 ; DARWIN-ARM: push {r7}
 ; DARWIN-ARM: mov r7, sp
-; DARWIN-ARM: mov r0, r7
-; DARWIN-ARM: ldr r0, [r0]
+; DARWIN-ARM: ldr r0, [r7]
 
 ; DARWIN-THUMB2-LABEL: frameaddr_index1:
 ; DARWIN-THUMB2: str r7, [sp, #-4]!
 ; DARWIN-THUMB2: mov r7, sp
-; DARWIN-THUMB2: mov r0, r7
-; DARWIN-THUMB2: ldr r0, [r0]
+; DARWIN-THUMB2: ldr r0, [r7]
 
 ; LINUX-ARM-LABEL: frameaddr_index1:
 ; LINUX-ARM: push {r11}
@@ -63,16 +61,14 @@ entry:
 ; DARWIN-ARM-LABEL: frameaddr_index3:
 ; DARWIN-ARM: push {r7}
 ; DARWIN-ARM: mov r7, sp
-; DARWIN-ARM: mov r0, r7
-; DARWIN-ARM: ldr r0, [r0]
+; DARWIN-ARM: ldr r0, [r7]
 ; DARWIN-ARM: ldr r0, [r0]
 ; DARWIN-ARM: ldr r0, [r0]
 
 ; DARWIN-THUMB2-LABEL: frameaddr_index3:
 ; DARWIN-THUMB2: str r7, [sp, #-4]!
 ; DARWIN-THUMB2: mov r7, sp
-; DARWIN-THUMB2: mov r0, r7
-; DARWIN-THUMB2: ldr r0, [r0]
+; DARWIN-THUMB2: ldr r0, [r7]
 ; DARWIN-THUMB2: ldr r0, [r0]
 ; DARWIN-THUMB2: ldr r0, [r0]
 
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index b08b72b..089209e 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -5,8 +5,6 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-LONG
 
-; XFAIL: vg_leak
-
 ; Note that some of these tests assume that relocations are either
 ; movw/movt or constant pool loads. Different platforms will select
 ; different approaches.
@@ -15,7 +13,7 @@
 @temp = common global [60 x i8] zeroinitializer, align 1
 
 define void @t1() nounwind ssp {
-; ARM: t1
+; ARM-LABEL: t1:
 ; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}}
 ; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; ARM: add r0, r0, #5
@@ -23,12 +21,12 @@ define void @t1() nounwind ssp {
 ; ARM: movw r2, #10
 ; ARM: and r1, r1, #255
 ; ARM: bl {{_?}}memset
-; ARM-LONG: t1
+; ARM-LONG-LABEL: t1:
 ; ARM-LONG: {{(movw r3, :lower16:L_memset\$non_lazy_ptr)|(ldr r3, .LCPI)}}
 ; ARM-LONG: {{(movt r3, :upper16:L_memset\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
-; THUMB: t1
+; THUMB-LABEL: t1:
 ; THUMB: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; THUMB: adds r0, #5
@@ -38,7 +36,7 @@ define void @t1() nounwind ssp {
 ; THUMB: movt r2, #0
 ; THUMB: and r1, r1, #255
 ; THUMB: bl {{_?}}memset
-; THUMB-LONG: t1
+; THUMB-LONG-LABEL: t1:
 ; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
@@ -50,7 +48,7 @@ define void @t1() nounwind ssp {
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @t2() nounwind ssp {
-; ARM: t2
+; ARM-LABEL: t2:
 ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
@@ -61,12 +59,12 @@ define void @t2() nounwind ssp {
 ; ARM: mov r0, r1
 ; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
 ; ARM: bl {{_?}}memcpy
-; ARM-LONG: t2
+; ARM-LONG-LABEL: t2:
 ; ARM-LONG: {{(movw r3, :lower16:L_memcpy\$non_lazy_ptr)|(ldr r3, .LCPI)}}
 ; ARM-LONG: {{(movt r3, :upper16:L_memcpy\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
-; THUMB: t2
+; THUMB-LABEL: t2:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
@@ -78,7 +76,7 @@ define void @t2() nounwind ssp {
 ; THUMB: mov r0, r1
 ; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
 ; THUMB: bl {{_?}}memcpy
-; THUMB-LONG: t2
+; THUMB-LONG-LABEL: t2:
 ; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
@@ -90,7 +88,7 @@ define void @t2() nounwind ssp {
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 define void @t3() nounwind ssp {
-; ARM: t3
+; ARM-LABEL: t3:
 ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
@@ -99,12 +97,12 @@ define void @t3() nounwind ssp {
 ; ARM: movw r2, #10
 ; ARM: mov r0, r1
 ; ARM: bl {{_?}}memmove
-; ARM-LONG: t3
+; ARM-LONG-LABEL: t3:
 ; ARM-LONG: {{(movw r3, :lower16:L_memmove\$non_lazy_ptr)|(ldr r3, .LCPI)}}
 ; ARM-LONG: {{(movt r3, :upper16:L_memmove\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
-; THUMB: t3
+; THUMB-LABEL: t3:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
@@ -116,7 +114,7 @@ define void @t3() nounwind ssp {
 ; THUMB: mov r0, r1
 ; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
 ; THUMB: bl {{_?}}memmove
-; THUMB-LONG: t3
+; THUMB-LONG-LABEL: t3:
 ; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
@@ -126,7 +124,7 @@ define void @t3() nounwind ssp {
 }
 
 define void @t4() nounwind ssp {
-; ARM: t4
+; ARM-LABEL: t4:
 ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
@@ -137,7 +135,7 @@ define void @t4() nounwind ssp {
 ; ARM: ldrh r1, [r0, #24]
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
-; THUMB: t4
+; THUMB-LABEL: t4:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
@@ -155,7 +153,7 @@ define void @t4() nounwind ssp {
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 define void @t5() nounwind ssp {
-; ARM: t5
+; ARM-LABEL: t5:
 ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
@@ -170,7 +168,7 @@ define void @t5() nounwind ssp {
 ; ARM: ldrh r1, [r0, #24]
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
-; THUMB: t5
+; THUMB-LABEL: t5:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
@@ -190,7 +188,7 @@ define void @t5() nounwind ssp {
 }
 
 define void @t6() nounwind ssp {
-; ARM: t6
+; ARM-LABEL: t6:
 ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
@@ -215,7 +213,7 @@ define void @t6() nounwind ssp {
 ; ARM: ldrb r1, [r0, #25]
 ; ARM: strb r1, [r0, #13]
 ; ARM: bx lr
-; THUMB: t6
+; THUMB-LABEL: t6:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
@@ -253,9 +251,9 @@ define void @t7() nounwind ssp {
 
 define i32 @t8(i32 %x) nounwind {
 entry:
-; ARM: t8
+; ARM-LABEL: t8:
 ; ARM-NOT: FastISel missed call:   %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
-; THUMB: t8
+; THUMB-LABEL: t8:
 ; THUMB-NOT: FastISel missed call:   %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
   %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
   ret i32 %expval
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index 93c14a0..9bd0a51 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fast-tail-call.ll b/test/CodeGen/ARM/fast-tail-call.ll
index 9fbdc9d..6472016 100644
--- a/test/CodeGen/ARM/fast-tail-call.ll
+++ b/test/CodeGen/ARM/fast-tail-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv7-linux-gnueabi -O0 -arm-tail-calls < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-linux-gnueabi -O0 < %s | FileCheck %s
 
 ; Primarily a non-crash test: Thumbv7 Linux does not have FastISel support,
 ; which led (via a convoluted route) to DAG nodes after a TC_RETURN that
diff --git a/test/CodeGen/ARM/fastcc-vfp.ll b/test/CodeGen/ARM/fastcc-vfp.ll
new file mode 100644
index 0000000..4c98150
--- /dev/null
+++ b/test/CodeGen/ARM/fastcc-vfp.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mattr=+vfp2 | FileCheck %s
+
+define fastcc double @t1(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) {
+entry:
+; CHECK-LABEL: t1:
+; CHECK-NOT: vmov
+; CHECK: vldr
+  %add = fadd float %a, %b
+  %conv = fpext float %add to double
+  ret double %conv
+}
+
+define fastcc double @t2(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) {
+entry:
+; CHECK-LABEL: t2:
+; CHECK-NOT: vmov
+; CHECK: vldr
+  %add = fadd double %a, %c
+  ret double %add
+}
+
+define fastcc float @t3(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) {
+entry:
+; CHECK-LABEL: t3:
+; CHECK: vldr
+  %add = fadd float %a, %c
+  ret float %add
+}
+
+define fastcc double @t4(double %a, double %b) #0 {
+entry:
+; CHECK-LABEL: t4:
+; CHECK: vstr
+  %add = fadd double %a, %b
+  %sub = fsub double %a, %b
+  %call = tail call fastcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub) #2
+  ret double %call
+}
+
+declare fastcc double @x(double, double, double, double, double, double, double, float, double)
diff --git a/test/CodeGen/ARM/fastisel-thumb-litpool.ll b/test/CodeGen/ARM/fastisel-thumb-litpool.ll
new file mode 100644
index 0000000..aa9e726
--- /dev/null
+++ b/test/CodeGen/ARM/fastisel-thumb-litpool.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -O0 -o - %s | FileCheck %s
+
+; We used to accidentally create both an ARM and a Thumb ldr here. It led to an
+; assertion failure at the time, but could go all the way through to emission,
+; hence the CHECK-NOT.
+
+define i32 @test_thumb_ldrlit() minsize {
+; CHECK: ldr r0, LCPI0_0
+; CHECK-NOT: ldr
+  ret i32 12345678
+}
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index a4fecfe..7cab766 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s -check-prefix=VFP2
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s -check-prefix=NFP0
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll
index 6db2385..f3406cc 100644
--- a/test/CodeGen/ARM/fixunsdfdi.ll
+++ b/test/CodeGen/ARM/fixunsdfdi.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
-; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
+; RUN: llc -mtriple=arm-eabi -mattr=vfp2 %s -o - | FileCheck %s
 
 define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
 entry:
@@ -27,3 +27,6 @@ bb7:		; preds = %bb3
 bb10:		; preds = %entry
 	ret i64 0
 }
+
+; CHECK-NOT: vstr.64
+
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index f2486c6..6f8c0fe 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s -check-prefix=HARD
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s -check-prefix=VFP2
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s -check-prefix=NEON
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
+; RUN: llc -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard %s -o - | FileCheck %s -check-prefix=HARD
 
 define float @t1(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fmdrr-fmrrd.ll b/test/CodeGen/ARM/fmdrr-fmrrd.ll
index eb72faf..a3669b4 100644
--- a/test/CodeGen/ARM/fmdrr-fmrrd.ll
+++ b/test/CodeGen/ARM/fmdrr-fmrrd.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr
-; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd
+; RUN: llc -mtriple=arm-eabi -mattr=vfp2 %s -o - | FileCheck %s
 
 ; naive codegen for this is:
 ; _i:
@@ -11,3 +10,8 @@ define i64 @test(double %X) {
         %Y = bitcast double %X to i64
         ret i64 %Y
 }
+
+; CHECK-LABEL: test:
+; CHECK-NOT: fmdrr
+; CHECK-NOT: fmrrd
+
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
index f16ec17..5aff74c 100644
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s -check-prefix=VFP2
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s -check-prefix=NEON
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8
 
 define float @t1(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index d11f6bd..b24d867 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -1,9 +1,20 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
-; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP2
+
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NFP0
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8U
+
+; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8U
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index dc4c2e3..36af835 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,9 +1,20 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
-; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP2
+
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NFP0
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8U
+
+; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA8U
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test1(float* %a) {
 entry:
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
index 825feaa..ab35a97 100644
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s -check-prefix=VFP2
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s -check-prefix=NEON
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8
 
 define float @t1(float %acc, float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 78ccb60..5fa6b21 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,9 +1,20 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U
-; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8U
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP2
+
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NEON
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=A8
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic %s -o - \
+; RUN:  | FileCheck %s -check-prefix=A8
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN:  | FileCheck %s -check-prefix=A8U
+
+; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=A8U
 
 define float @t1(float %acc, float %a, float %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll
index 6d7bc05..e14e5ba 100644
--- a/test/CodeGen/ARM/fnmul.ll
+++ b/test/CodeGen/ARM/fnmul.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64
+; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s
+
+; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math %s -o - \
+; RUN:  | FileCheck %s -check-prefix CHECK-ROUNDING
+
 
 
 define double @t1(double %a, double %b) {
@@ -9,3 +12,6 @@ entry:
         ret double %tmp4
 }
 
+; CHECK: vnmul.f64
+; CHECK-ROUNDING: vmul.f64
+
diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll
index 3223885..de3b053 100644
--- a/test/CodeGen/ARM/fnmuls.ll
+++ b/test/CodeGen/ARM/fnmuls.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc float @test1(float %a, float %b) nounwind {
 ; CHECK: vnmul.f32 s0, s0, s1 
diff --git a/test/CodeGen/ARM/fold-const.ll b/test/CodeGen/ARM/fold-const.ll
index 1ba561d..dc5419f 100644
--- a/test/CodeGen/ARM/fold-const.ll
+++ b/test/CodeGen/ARM/fold-const.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v7 %s -o - | FileCheck %s
 
 define i32 @f(i32 %a) nounwind readnone optsize ssp {
 entry:
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
index 67fd129..695a20b 100644
--- a/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -1,6 +1,7 @@
-; RUN: llc -mtriple=thumbv7-apple-darwin-eabi < %s | FileCheck %s
-; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1
+; RUN: llc -mtriple=thumbv7-apple-none-macho < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-apple-none-macho -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1
 ; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS
+; RUN: llc -mtriple=thumbv7--linux-gnueabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-LINUX
 
 
 declare void @bar(i8*)
@@ -11,11 +12,11 @@ declare void @bar(i8*)
 
 define void @check_simple() minsize {
 ; CHECK-LABEL: check_simple:
-; CHECK: push.w {r7, r8, r9, r10, r11, lr}
+; CHECK: push {r3, r4, r5, r6, r7, lr}
 ; CHECK-NOT: sub sp, sp,
 ; ...
 ; CHECK-NOT: add sp, sp,
-; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
+; CHECK: pop {r0, r1, r2, r3, r7, pc}
 
 ; CHECK-T1-LABEL: check_simple:
 ; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
@@ -43,11 +44,11 @@ define void @check_simple() minsize {
 
 define void @check_simple_too_big() minsize {
 ; CHECK-LABEL: check_simple_too_big:
-; CHECK: push.w {r11, lr}
+; CHECK: push {r7, lr}
 ; CHECK: sub sp,
 ; ...
 ; CHECK: add sp,
-; CHECK: pop.w {r11, pc}
+; CHECK: pop {r7, pc}
   %var = alloca i8, i32 64
   call void @bar(i8* %var)
   ret void
@@ -92,16 +93,16 @@ define void @check_vfp_fold() minsize {
 ; folded in except that doing so would clobber the value being returned.
 define i64 @check_no_return_clobber() minsize {
 ; CHECK-LABEL: check_no_return_clobber:
-; CHECK: push.w {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr}
 ; CHECK-NOT: sub sp,
 ; ...
-; CHECK: add sp, #40
-; CHECK: pop.w {r11, pc}
+; CHECK: add sp, #24
+; CHECK: pop {r7, pc}
 
   ; Just to keep iOS FileCheck within previous function:
 ; CHECK-IOS-LABEL: check_no_return_clobber:
 
-  %var = alloca i8, i32 40
+  %var = alloca i8, i32 20
   call void @bar(i8* %var)
   ret i64 0
 }
@@ -161,4 +162,57 @@ end:
   ; We want the epilogue to be the only thing in a basic block so that we hit
   ; the correct edge-case (first inst in block is correct one to adjust).
   ret void
-}
-\ No newline at end of file
+}
+
+define void @test_varsize(...) minsize {
+; CHECK-T1-LABEL: test_varsize:
+; CHECK-T1: sub	sp, #16
+; CHECK-T1: push	{r2, r3, r4, r5, r7, lr}
+; ...
+; CHECK-T1: pop	{r2, r3, r4, r5, r7}
+; CHECK-T1: pop	{r3}
+; CHECK-T1: add	sp, #16
+; CHECK-T1: bx	r3
+
+; CHECK-LABEL: test_varsize:
+; CHECK: sub	sp, #16
+; CHECK: push	{r5, r6, r7, lr}
+; ...
+; CHECK: pop.w	{r2, r3, r7, lr}
+; CHECK: add	sp, #16
+; CHECK: bx	lr
+
+  %var = alloca i8, i32 8
+  call void @bar(i8* %var)
+  ret void
+}
+
+%"MyClass" = type { i8*, i32, i32, float, float, float, [2 x i8], i32, i32* }
+
+declare float @foo()
+
+declare void @bar3()
+
+declare %"MyClass"* @bar2(%"MyClass"* returned, i16*, i32, float, float, i32, i32, i1 zeroext, i1 zeroext, i32)
+
+define fastcc float @check_vfp_no_return_clobber2(i16* %r, i16* %chars, i32 %length, i1 zeroext %flag) minsize {
+entry:
+; CHECK-LINUX-LABEL: check_vfp_no_return_clobber2
+; CHECK-LINUX: vpush	{d0, d1, d2, d3, d4, d5, d6, d7, d8}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK-LINUX: add sp
+; CHECK-LINUX: vpop {d8}
+  %run = alloca %"MyClass", align 4
+  %call = call %"MyClass"* @bar2(%"MyClass"* %run, i16* %chars, i32 %length, float 0.000000e+00, float 0.000000e+00, i32 1, i32 1, i1 zeroext false, i1 zeroext true, i32 3)
+  %call1 = call float @foo()
+  %cmp = icmp eq %"MyClass"* %run, null
+  br i1 %cmp, label %exit, label %if.then
+
+if.then:                                          ; preds = %entry
+  call void @bar3()
+  br label %exit
+
+exit:                                             ; preds = %if.then, %entry
+  ret float %call1
+}
diff --git a/test/CodeGen/ARM/formal.ll b/test/CodeGen/ARM/formal.ll
index 4ac10ba..05a6be1 100644
--- a/test/CodeGen/ARM/formal.ll
+++ b/test/CodeGen/ARM/formal.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 declare void @bar(i64 %x, i64 %y)
 
diff --git a/test/CodeGen/ARM/fp-arg-shuffle.ll b/test/CodeGen/ARM/fp-arg-shuffle.ll
index ae02b79..4996cc8 100644
--- a/test/CodeGen/ARM/fp-arg-shuffle.ll
+++ b/test/CodeGen/ARM/fp-arg-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -float-abi=soft | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -float-abi=soft %s -o - | FileCheck %s
 
 ; CHECK: function1
 ; CHECK-NOT: vmov
diff --git a/test/CodeGen/ARM/fp-fast.ll b/test/CodeGen/ARM/fp-fast.ll
index ec57187..7d95a5e 100644
--- a/test/CodeGen/ARM/fp-fast.ll
+++ b/test/CodeGen/ARM/fp-fast.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=arm -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math %s -o - \
+; RUN:  | FileCheck %s
 
 ; CHECK: test1
 define float @test1(float %x) {
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index fbf3a4a..7e1f000 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+vfp2 %s -o - | FileCheck %s
 
 define float @f(i32 %a) {
 ;CHECK-LABEL: f:
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll
index a5c1aed..fba7946 100644
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -9,7 +9,7 @@ target triple = "armv7-eabi"
 
 define arm_aapcs_vfpcc void @foo() nounwind {
 ; CHECK-LABEL: foo:
-; CHECK-FP6-LABEL: foo:
+; CHECK-FP16-LABEL: foo:
 entry:
   %0 = load i16* @x, align 2
   %1 = load i16* @y, align 2
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index f0d9100..6f47075 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -1,9 +1,20 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP2
+
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP2
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
+; RUN: | FileCheck %s -check-prefix=VFP2
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NEON
+
+; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NEON
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP2
 
 define i32 @test1(float %a, float %b) {
 ; VFP2-LABEL: test1:
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 3a0af16..eab5988 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math %s -o - \
+; RUN:  | FileCheck %s
+
 ; rdar://7461510
 ; rdar://10964603
 
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index 916a1ae..e3ffd45 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s
 
 define i32 @f1(float %a) {
 ;CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll
index 0679a47..5a45a9b 100644
--- a/test/CodeGen/ARM/fpconsts.ll
+++ b/test/CodeGen/ARM/fpconsts.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp3 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp3 %s -o - | FileCheck %s
 
 define float @t1(float %x) nounwind readnone optsize {
 entry:
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index 326e062..eadf9af 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc -mtriple=arm-apple-darwin %s -o - | FileCheck %s
 
 define float @f1(double %x) {
 ;CHECK-VFP-LABEL: f1:
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 8fbd1d8..3a454ed 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc -mtriple=arm -float-abi=soft -mattr=+vfp2 %s -o - | FileCheck %s
 
 define float @f1(float %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM/fpow.ll b/test/CodeGen/ARM/fpow.ll
index 6d48792..3e37724 100644
--- a/test/CodeGen/ARM/fpow.ll
+++ b/test/CodeGen/ARM/fpow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index 7408687..c721756 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s
 
 @i = weak global i32 0		; <i32*> [#uses=2]
 @u = weak global i32 0		; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index 617b018..baff34a 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -1,8 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
-; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U
-; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1U
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP2
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NFP1
+
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NFP1U
+
+; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NFP1U
+
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NFP0
 
 define float @test(float %a, float %b) {
 entry:
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 893b426..d268585 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
-; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
-; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
+; RUN: llc -mtriple=armv6-linux-gnueabi %s -o - | FileCheck %s
+
+; RUN: llc -mtriple=armv6-linux-gnu --disable-fp-elim %s -o - \
+; RUN:  | FileCheck %s -check-prefix CHECK-FP-ELIM
+
+; RUN: llc -mtriple=armv6-apple-ios %s -o - \
+; RUN:  | FileCheck %s -check-prefix CHECK-FP-ELIM
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
@@ -12,3 +15,11 @@ define i32 @main() {
 }
 
 declare i32 @puts(i8*)
+
+; CHECK: mov
+; CHECK-NOT: mov
+
+; CHECK-FP-ELIM: mov
+; CHECK-FP-ELIM: mov
+; CHECK-FP-ELIM-NOT: mov
+
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index 600a8c2..c52caf6 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
 
 ;; Integer absolute value, should produce something as good as: ARM:
 ;;   movs r0, r0
diff --git a/test/CodeGen/ARM/ifconv-kills.ll b/test/CodeGen/ARM/ifconv-kills.ll
index bf54ba2..de80c92 100644
--- a/test/CodeGen/ARM/ifconv-kills.ll
+++ b/test/CodeGen/ARM/ifconv-kills.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march arm -mcpu swift -verify-machineinstrs
+; RUN: llc -mtriple arm-eabi -mcpu swift -verify-machineinstrs %s -o /dev/null
 
 declare i32 @f(i32 %p0, i32 %p1)
 
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
new file mode 100644
index 0000000..86ed5b2
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=armv4t--linux-androideabi -print-machineinstrs=if-converter -o /dev/null 2>&1 | FileCheck %s
+; Fix a bug triggered in IfConverterTriangle when CvtBB has multiple
+; predecessors.
+; PR18752
+
+%classK = type { i8, %classF }
+%classF = type { i8 }
+%classL = type { %classG, i32, i32 }
+%classG = type { %classL* }
+%classM2 = type { %classL }
+
+define zeroext i1 @test(%classK* %this, %classM2* nocapture readnone %p1, %classM2* nocapture readnone %p2) align 2 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+; Before if conversion, we have
+; for.body -> lor.lhs.false.i (62)
+;          -> for.cond.backedge (62)
+; lor.lhs.false.i -> for.cond.backedge (1048575)
+;                 -> cond.false.i (1)
+; Afer if conversion, we have
+; for.body -> for.cond.backedge (130023362)
+;          -> cond.false.i (62)
+; CHECK: BB#1: derived from LLVM BB %for.body
+; CHECK: Successors according to CFG: BB#2(130023362) BB#4(62)
+for.body:
+  br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i
+
+for.cond.backedge:
+  %tobool = icmp eq %classL* undef, null
+  br i1 %tobool, label %for.end, label %for.body
+
+lor.lhs.false.i:
+  %tobool.i.i7 = icmp eq i32 undef, 0
+  br i1 %tobool.i.i7, label %for.cond.backedge, label %cond.false.i
+
+cond.false.i:
+  call void @_Z3fn1v()
+  unreachable
+
+for.end:
+  br i1 undef, label %if.else.i.i, label %if.then.i.i
+
+if.then.i.i:
+  store %classL* null, %classL** undef, align 4
+  br label %_ZN1M6spliceEv.exit
+
+if.else.i.i:
+  store %classL* null, %classL** null, align 4
+  br label %_ZN1M6spliceEv.exit
+
+_ZN1M6spliceEv.exit:
+  %LIS = getelementptr inbounds %classK* %this, i32 0, i32 1
+  call void @_ZN1F10handleMoveEb(%classF* %LIS, i1 zeroext false)
+  unreachable
+}
+
+declare %classL* @_ZN1M1JI1LS1_EcvPS1_Ev(%classM2*)
+declare void @_ZN1F10handleMoveEb(%classF*, i1 zeroext)
+declare void @_Z3fn1v()
+
+!0 = metadata !{metadata !"clang version 3.5"}
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll
new file mode 100644
index 0000000..cd8a561
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv8 -print-machineinstrs=if-converter -o /dev/null 2>&1 | FileCheck %s
+
+%struct.S = type { i8* (i8*)*, [1 x i8] }
+define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
+entry:
+  %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
+  %1 = load i8* %0, align 1
+  %2 = zext i8 %1 to i32
+  %3 = and i32 %2, 112
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %return, label %bb
+
+bb:
+  %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
+  %6 = load i8* %5, align 1
+  %7 = zext i8 %6 to i32
+  %8 = and i32 %7, 112
+  %9 = icmp eq i32 %8, 0
+  br i1 %9, label %return, label %bb2
+
+; CHECK: BB#2: derived from LLVM BB %bb2
+; CHECK: Successors according to CFG: BB#3(192) BB#4(192)
+
+bb2:
+  %v10 = icmp eq i32 %3, 16
+  br i1 %v10, label %bb4, label %bb3, !prof !0
+
+bb3:
+  %v11 = icmp eq i32 %8, 16
+  br i1 %v11, label %bb4, label %return, !prof !1
+
+bb4:
+  %v12 = ptrtoint %struct.S* %x to i32
+  %phitmp = trunc i32 %v12 to i8
+  ret i8 %phitmp
+
+return:
+  ret i8 1
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 4, i32 12}
+!1 = metadata !{metadata !"branch_weights", i32 8, i32 16}
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index 5a55653..cae2399 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -march=arm -mcpu=swift     | FileCheck %s -check-prefix=SWIFT
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8
+; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s -check-prefix=SWIFT
 
 define i32 @t1(i32 %a, i32 %b) {
 ; A8-LABEL: t1:
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
index e34edec..e445416 100644
--- a/test/CodeGen/ARM/ifcvt2.ll
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: t1:
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index fa7d618..5da63dc 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s -check-prefix CHECK-V4-CMP
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s -check-prefix CHECK-V4-BX
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: t1:
@@ -22,3 +22,11 @@ cond_next:
 	%tmp15 = add i32 %b, %a
 	ret i32 %tmp15
 }
+
+; CHECK-V4-CMP: cmpne
+; CHECK-V4-CMP-NOT: cmpne
+
+; CHECK-V4-BX: bx
+; CHECK-V4-BX: bx
+; CHECK-V4-BX-NOT: bx
+
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll
index 53c789d..8c6825a 100644
--- a/test/CodeGen/ARM/ifcvt4.ll
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 ; Do not if-convert when branches go to the different loops.
 ; CHECK-LABEL: t:
diff --git a/test/CodeGen/ARM/ifcvt9.ll b/test/CodeGen/ARM/ifcvt9.ll
index 05bdc45..1191716 100644
--- a/test/CodeGen/ARM/ifcvt9.ll
+++ b/test/CodeGen/ARM/ifcvt9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define fastcc void @t() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll
index febe6f5..7208fff 100644
--- a/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -mtriple=arm-linux
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
+; RUN: llc -mtriple=arm-linux %s -o /dev/null
 
 define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
 {
diff --git a/test/CodeGen/ARM/imm.ll b/test/CodeGen/ARM/imm.ll
index 6f25f9d..e7bc0af 100644
--- a/test/CodeGen/ARM/imm.ll
+++ b/test/CodeGen/ARM/imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep CPI
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @test1(i32 %A) {
         %B = add i32 %A, -268435441             ; <i32> [#uses=1]
@@ -14,3 +14,6 @@ define i32 @test3(i32 %A) {
         ret i32 %B
 }
 
+; CHECK-NOT: CPI
+
+
diff --git a/test/CodeGen/ARM/indirect-reg-input.ll b/test/CodeGen/ARM/indirect-reg-input.ll
index b936455..17f6a9c 100644
--- a/test/CodeGen/ARM/indirect-reg-input.ll
+++ b/test/CodeGen/ARM/indirect-reg-input.ll
@@ -1,4 +1,4 @@
-; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - 2>&1 | FileCheck %s
 
 ; Check for error message:
 ; CHECK: error: inline asm not supported yet: don't know how to handle tied indirect register inputs
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 1aeeb91..7c49cb3 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -11,6 +11,11 @@ define internal i32 @foo(i32 %i) nounwind {
 ; THUMB-LABEL: foo:
 ; THUMB2-LABEL: foo:
 entry:
+  ; _nextaddr gets CSEed for use later on.
+; THUMB: ldr r[[NEXTADDR_REG:[0-9]+]], [[NEXTADDR_CPI:LCPI0_[0-9]+]]
+; THUMB: [[NEXTADDR_PCBASE:LPC0_[0-9]]]:
+; THUMB: add r[[NEXTADDR_REG]], pc
+
   %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
 ; indirect branch gets duplicated here
@@ -53,12 +58,11 @@ L1:                                               ; preds = %L2, %bb2
 ; ARM: ldr [[R1:r[0-9]+]], LCPI
 ; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
 ; ARM: str [[R1b]]
+
 ; THUMB-LABEL: %L1
-; THUMB: ldr
-; THUMB: add
 ; THUMB: ldr [[R2:r[0-9]+]], LCPI
 ; THUMB: add [[R2]], pc
-; THUMB: str [[R2]]
+; THUMB: str [[R2]], [r[[NEXTADDR_REG]]]
 ; THUMB2-LABEL: %L1
 ; THUMB2: ldr [[R2:r[0-9]+]], LCPI
 ; THUMB2-NEXT: str{{(.w)?}} [[R2]]
@@ -67,4 +71,5 @@ L1:                                               ; preds = %L2, %bb2
 }
 ; ARM: .long Ltmp0-(LPC{{.*}}+8)
 ; THUMB: .long Ltmp0-(LPC{{.*}}+4)
+; THUMB: .long _nextaddr-([[NEXTADDR_PCBASE]]+4)
 ; THUMB2: .long Ltmp0
diff --git a/test/CodeGen/ARM/inline-diagnostics.ll b/test/CodeGen/ARM/inline-diagnostics.ll
new file mode 100644
index 0000000..7b77da2
--- /dev/null
+++ b/test/CodeGen/ARM/inline-diagnostics.ll
@@ -0,0 +1,16 @@
+; RUN: not llc < %s -verify-machineinstrs -mtriple=armv7-none-linux-gnu -mattr=+neon 2>&1 | FileCheck %s
+
+%struct.float4 = type { float, float, float, float }
+
+; CHECK: error: Don't know how to handle indirect register inputs yet for constraint 'w'
+define float @inline_func(float %f1, float %f2) #0 {
+  %c1 = alloca %struct.float4, align 4
+  %c2 = alloca %struct.float4, align 4
+  %c3 = alloca %struct.float4, align 4
+  call void asm sideeffect "vmul.f32 ${2:q}, ${0:q}, ${1:q}", "=*r,=*r,*w"(%struct.float4* %c1, %struct.float4* %c2, %struct.float4* %c3) #1, !srcloc !1
+  %x = getelementptr inbounds %struct.float4* %c3, i32 0, i32 0
+  %1 = load float* %x, align 4
+  ret float %1
+}
+
+!1 = metadata !{i32 271, i32 305}
diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll
index 683a0c4..d098a43 100644
--- a/test/CodeGen/ARM/inlineasm-64bit.ll
+++ b/test/CodeGen/ARM/inlineasm-64bit.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O3  -mtriple=arm-linux-gnueabi | FileCheck %s
-; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc < %s -O3  -mtriple=arm-linux-gnueabi -no-integrated-as | FileCheck %s
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs -no-integrated-as < %s | FileCheck %s
 ; check if regs are passing correctly
 define void @i64_write(i64* %p, i64 %val) nounwind {
 ; CHECK-LABEL: i64_write:
diff --git a/test/CodeGen/ARM/inlineasm-imm-arm.ll b/test/CodeGen/ARM/inlineasm-imm-arm.ll
index 45dfcf0..603e52d 100644
--- a/test/CodeGen/ARM/inlineasm-imm-arm.ll
+++ b/test/CodeGen/ARM/inlineasm-imm-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi -no-integrated-as %s -o /dev/null
 
 ; Test ARM-mode "I" constraint, for any Data Processing immediate.
 define i32 @testI(i32 %x) {
diff --git a/test/CodeGen/ARM/inlineasm-ldr-pseudo.ll b/test/CodeGen/ARM/inlineasm-ldr-pseudo.ll
new file mode 100644
index 0000000..f63e4b0
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-ldr-pseudo.ll
@@ -0,0 +1,17 @@
+; PR18354
+; We actually need to use -filetype=obj in this test because if we output
+; assembly, the current code path will bypass the parser and just write the
+; raw text out to the Streamer. We need to actually parse the inlineasm to
+; demonstrate the bug. Going the asm->obj route does not show the issue.
+; RUN: llc -mtriple=arm-none-linux   < %s -filetype=obj | llvm-objdump -d - | FileCheck %s
+; RUN: llc -mtriple=arm-apple-darwin < %s -filetype=obj | llvm-objdump -d - | FileCheck %s
+; CHECK-LABEL: foo:
+; CHECK: 0:       00 00 9f e5                                     ldr     r0, [pc]
+; CHECK: 4:       0e f0 a0 e1                                     mov     pc, lr
+; Make sure the constant pool entry comes after the return
+; CHECK: 8:       01 00 00 00
+define i32 @foo() nounwind {
+entry:
+  %0 = tail call i32 asm sideeffect "ldr $0,=1", "=r"() nounwind
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-arm.ll b/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-arm.ll
new file mode 100644
index 0000000..3be378d
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-arm.ll
@@ -0,0 +1,18 @@
+;RUN:  llc -mtriple=armv7-linux-gnueabi < %s | llvm-mc -triple=armv7-linux-gnueabi -filetype=obj | llvm-objdump -triple=armv7 -d - | FileCheck %s
+;RUN:  llc -mtriple=armv7-linux-gnueabi < %s | FileCheck %s -check-prefix=ASM
+;RUN:  llc -mtriple=armv7-apple-darwin < %s | FileCheck %s -check-prefix=ASM
+
+define hidden i32 @bah(i8* %start) #0 align 2 {
+  %1 = ptrtoint i8* %start to i32
+  %2 = tail call i32 asm sideeffect "@ Enter THUMB Mode\0A\09adr r3, 2f+1 \0A\09bx  r3 \0A\09.code 16 \0A2: push {r7} \0A\09mov r7, $4 \0A\09svc 0x0 \0A\09pop {r7} \0A\09", "={r0},{r0},{r1},{r2},r,~{r3}"(i32 %1, i32 %1, i32 0, i32 983042) #3
+  %3 = add i32 %1, 1
+  ret i32 %3
+}
+; CHECK: $t
+; CHECK: $a
+; CHECK: 01 00 81 e2     add     r0, r1, #1
+
+; .code 32 is implicit
+; ASM-LABEL: bah:
+; ASM: .code 16
+; ASM: .code 32
diff --git a/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-thumb.ll b/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-thumb.ll
new file mode 100644
index 0000000..b9bd4c2
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-switch-mode-oneway-from-thumb.ll
@@ -0,0 +1,18 @@
+;RUN:  llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj | llvm-objdump -triple=thumbv7 -d - | FileCheck %s
+;RUN:  llc -mtriple=thumbv7-linux-gnueabi < %s | FileCheck %s -check-prefix=ASM
+;RUN:  llc -mtriple=thumbv7-apple-darwin < %s | FileCheck %s -check-prefix=ASM
+
+define hidden i32 @bah(i8* %start) #0 align 2 {
+  %1 = ptrtoint i8* %start to i32
+  %2 = tail call i32 asm sideeffect "@ Enter ARM Mode  \0A\09adr r3, 1f \0A\09bx  r3 \0A\09.align 2 \0A\09.code 32 \0A1:  push {r7} \0A\09mov r7, $4 \0A\09svc 0x0 \0A\09pop {r7} \0A\09", "={r0},{r0},{r1},{r2},r,~{r3}"(i32 %1, i32 %1, i32 0, i32 983042) #3
+  %3 = add i32 %1, 1
+  ret i32 %3
+}
+; CHECK: $a
+; CHECK: $t
+; CHECK: 48 1c   adds    r0, r1, #1
+
+; ASM: .code 16
+; ASM-LABEL: bah:
+; ASM: .code 32
+; ASM: .code 16
diff --git a/test/CodeGen/ARM/inlineasm-switch-mode.ll b/test/CodeGen/ARM/inlineasm-switch-mode.ll
new file mode 100644
index 0000000..65fea11
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-switch-mode.ll
@@ -0,0 +1,22 @@
+;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj > %t
+; Two pass decoding needed because llvm-objdump does not respect mapping symbols
+;RUN: llvm-objdump -triple=armv7   -d %t | FileCheck %s --check-prefix=ARM
+;RUN: llvm-objdump -triple=thumbv7 -d %t | FileCheck %s --check-prefix=THUMB
+
+define hidden i32 @bah(i8* %start) #0 align 2 {
+  %1 = ptrtoint i8* %start to i32
+  %2 = tail call i32 asm sideeffect "@ Enter ARM Mode  \0A\09adr r3, 1f \0A\09bx  r3 \0A\09.align 2 \0A\09.code 32 \0A1:  push {r7} \0A\09mov r7, $4 \0A\09svc 0x0 \0A\09pop {r7} \0A\09@ Enter THUMB Mode\0A\09adr r3, 2f+1 \0A\09bx  r3 \0A\09.code 16 \0A2: \0A\09", "={r0},{r0},{r1},{r2},r,~{r3}"(i32 %1, i32 %1, i32 0, i32 983042) #3
+  %3 = add i32 %1, 1
+  ret i32 %3
+}
+
+; ARM: $a
+; ARM-NEXT: 04 70 2d e5     str     r7, [sp, #-4]!
+; ARM: $t
+; ARM-NEXT: 48 1c
+
+; THUMB: $a
+; THUMB-NEXT: 04 70
+; THUMB-NEXT: 2d e5
+; THUMB: $t
+; THUMB-NEXT: 48 1c   adds    r0, r1, #1
diff --git a/test/CodeGen/ARM/inlineasm.ll b/test/CodeGen/ARM/inlineasm.ll
index cca3c69..39962e0 100644
--- a/test/CodeGen/ARM/inlineasm.ll
+++ b/test/CodeGen/ARM/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null
 
 define i32 @test1(i32 %tmp54) {
 	%tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 )		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/inlineasm2.ll b/test/CodeGen/ARM/inlineasm2.ll
index a99bccf..5918738 100644
--- a/test/CodeGen/ARM/inlineasm2.ll
+++ b/test/CodeGen/ARM/inlineasm2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define double @__ieee754_sqrt(double %x) {
 	%tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x )
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 390a44e..eb7ba59 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+neon,+v6t2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon,+v6t2 -no-integrated-as %s -o - \
+; RUN:  | FileCheck %s
 
 ; Radar 7449043
 %struct.int32x4_t = type { <4 x i32> }
diff --git a/test/CodeGen/ARM/inlineasm4.ll b/test/CodeGen/ARM/inlineasm4.ll
index 4a1bcca..a117cd2 100644
--- a/test/CodeGen/ARM/inlineasm4.ll
+++ b/test/CodeGen/ARM/inlineasm4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define double @f(double %x) {
 entry:
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index d188fae..2749a8e 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
-; RUN:   grep mov | count 3
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o /dev/null
+; RUN: llc -mtriple=arm-apple-ios -mattr=+v6 %s -o - | FileCheck %s
 
 define i32 @test(i32 %x) {
         %tmp = trunc i32 %x to i16              ; <i16> [#uses=1]
@@ -9,3 +8,9 @@ define i32 @test(i32 %x) {
 }
 
 declare i32 @f(i32, i16)
+
+; CHECK: mov
+; CHECK: mov
+; CHECK: mov
+; CHECK-NOT: mov
+
diff --git a/test/CodeGen/ARM/integer_insertelement.ll b/test/CodeGen/ARM/integer_insertelement.ll
index 1d72afe..bf403b9 100644
--- a/test/CodeGen/ARM/integer_insertelement.ll
+++ b/test/CodeGen/ARM/integer_insertelement.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -o - -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 ; This test checks that when inserting one (integer) element into a vector,
 ; the vector is not spuriously copied. "vorr dX, dY, dY" is the way of moving
diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll
index 217fd69..9b7b41b 100644
--- a/test/CodeGen/ARM/interrupt-attr.ll
+++ b/test/CodeGen/ARM/interrupt-attr.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=arm-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A %s
 ; RUN: llc -mtriple=thumb-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A-THUMB %s
-; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-m3 -o - %s | FileCheck --check-prefix=CHECK-M %s
+; RUN: llc -mtriple=thumb-apple-none-macho -mcpu=cortex-m3 -o - %s | FileCheck --check-prefix=CHECK-M %s
 
 declare arm_aapcscc void @bar()
 
@@ -12,32 +12,33 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
 
   ; Also need special function return setting pc and CPSR simultaneously.
 ; CHECK-A-LABEL: irq_fn:
-; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr}
 ; CHECK-A: add r11, sp, #16
 ; CHECK-A: sub sp, sp, #{{[0-9]+}}
 ; CHECK-A: bic sp, sp, #7
 ; CHECK-A: bl bar
 ; CHECK-A: sub sp, r11, #16
-; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #4
 
 ; CHECK-A-THUMB-LABEL: irq_fn:
-; CHECK-A-THUMB: push {r0, r1, r2, r3, r4, r7, lr}
-; CHECK-A-THUMB: mov r4, sp
+; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r7, r12, lr}
 ; CHECK-A-THUMB: add r7, sp, #20
+; CHECK-A-THUMB: mov r4, sp
 ; CHECK-A-THUMB: bic r4, r4, #7
 ; CHECK-A-THUMB: bl bar
 ; CHECK-A-THUMB: sub.w r4, r7,  #20
 ; CHECK-A-THUMB: mov sp, r4
-; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r7, r12, lr}
 ; CHECK-A-THUMB: subs pc, lr, #4
 
   ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
   ; appropriate sentinel so no special return needed).
+; CHECK-M-LABEL: irq_fn:
 ; CHECK-M: push {r4, r7, lr}
 ; CHECK-M: add r7, sp, #4
-; CHECK-M: sub sp, #4
 ; CHECK-M: mov r4, sp
+; CHECK-M: bic r4, r4, #7
 ; CHECK-M: mov sp, r4
 ; CHECK-M: blx _bar
 ; CHECK-M: subs r4, r7, #4
@@ -48,6 +49,7 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
   ret void
 }
 
+; We don't push/pop r12, as it is banked for FIQ
 define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
 ; CHECK-A-LABEL: fiq_fn:
 ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
@@ -61,6 +63,8 @@ define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
 ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
 ; CHECK-A: subs pc, lr, #4
 
+; CHECK-A-THUMB-LABEL: fiq_fn:
+; CHECK-M-LABEL: fiq_fn:
   %val = load volatile [16 x i32]* @bigvar
   store volatile [16 x i32] %val, [16 x i32]* @bigvar
   ret void
@@ -68,13 +72,13 @@ define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
 
 define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
 ; CHECK-A-LABEL: swi_fn:
-; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
 ; CHECK-A: add r11, sp, #44
 ; CHECK-A: sub sp, sp, #{{[0-9]+}}
 ; CHECK-A: bic sp, sp, #7
 ; [...]
 ; CHECK-A: sub sp, r11, #44
-; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #0
 
   %val = load volatile [16 x i32]* @bigvar
@@ -84,13 +88,13 @@ define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
 
 define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" {
 ; CHECK-A-LABEL: undef_fn:
-; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr}
 ; CHECK-A: add r11, sp, #16
 ; CHECK-A: sub sp, sp, #{{[0-9]+}}
 ; CHECK-A: bic sp, sp, #7
 ; [...]
 ; CHECK-A: sub sp, r11, #16
-; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #0
 
   call void @bar()
@@ -99,13 +103,13 @@ define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" {
 
 define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" {
 ; CHECK-A-LABEL: abort_fn:
-; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr}
 ; CHECK-A: add r11, sp, #16
 ; CHECK-A: sub sp, sp, #{{[0-9]+}}
 ; CHECK-A: bic sp, sp, #7
 ; [...]
 ; CHECK-A: sub sp, r11, #16
-; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #4
 
   call void @bar()
diff --git a/test/CodeGen/ARM/intrinsics-crypto.ll b/test/CodeGen/ARM/intrinsics-crypto.ll
index c038fe6..96413d3 100644
--- a/test/CodeGen/ARM/intrinsics-crypto.ll
+++ b/test/CodeGen/ARM/intrinsics-crypto.ll
@@ -3,13 +3,13 @@
 define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
   %tmp = load <16 x i8>* %a
   %tmp2 = load <16 x i8>* %b
-  %tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2)
+  %tmp3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %tmp, <16 x i8> %tmp2)
   ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2)
+  %tmp4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %tmp3, <16 x i8> %tmp2)
   ; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4)
+  %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %tmp4)
   ; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5)
+  %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %tmp5)
   ; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}}
   ret <16 x i8> %tmp6
 }
@@ -18,40 +18,42 @@ define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i3
   %tmp = load <4 x i32>* %a
   %tmp2 = load <4 x i32>* %b
   %tmp3 = load <4 x i32>* %c
-  %res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp)
+  %scalar = extractelement <4 x i32> %tmp, i32 0
+  %resscalar = call i32 @llvm.arm.neon.sha1h(i32 %scalar)
+  %res1 = insertelement <4 x i32> undef, i32 %resscalar, i32 0
   ; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}}
-  %res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1)
+  %res2 = call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %tmp2, i32 %scalar, <4 x i32> %res1)
   ; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1)
+  %res3 = call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %res2, i32 %scalar, <4 x i32> %res1)
   ; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1)
+  %res4 = call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %res3, i32 %scalar, <4 x i32> %res1)
   ; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1)
+  %res5 = call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1)
   ; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1)
+  %res6 = call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %res5, <4 x i32> %res1)
   ; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}}
-  %res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1)
+  %res7 = call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1)
   ; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1)
+  %res8 = call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1)
   ; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1)
+  %res9 = call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1)
   ; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3)
+  %res10 = call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %res9, <4 x i32> %tmp3)
   ; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}}
   ret <4 x i32> %res10
 }
 
-declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>)
-declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>)
-declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>)
+declare i32 @llvm.arm.neon.sha1h(i32)
+declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
index 2f1a2cf..3086d79 100644
--- a/test/CodeGen/ARM/ispositive.ll
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @test1(i32 %X) {
 ; CHECK: lsr{{.*}}#31
diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll
index ddf0f0e..1a9a1fa 100644
--- a/test/CodeGen/ARM/large-stack.ll
+++ b/test/CodeGen/ARM/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
diff --git a/test/CodeGen/ARM/ldaex-stlex.ll b/test/CodeGen/ARM/ldaex-stlex.ll
new file mode 100644
index 0000000..bfdfea3
--- /dev/null
+++ b/test/CodeGen/ARM/ldaex-stlex.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -mtriple=armv8-apple-darwin   | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-apple-darwin | FileCheck %s
+
+%0 = type { i32, i32 }
+
+; CHECK-LABEL: f0:
+; CHECK: ldaexd
+define i64 @f0(i8* %p) nounwind readonly {
+entry:
+  %ldaexd = tail call %0 @llvm.arm.ldaexd(i8* %p)
+  %0 = extractvalue %0 %ldaexd, 1
+  %1 = extractvalue %0 %ldaexd, 0
+  %2 = zext i32 %0 to i64
+  %3 = zext i32 %1 to i64
+  %shl = shl nuw i64 %2, 32
+  %4 = or i64 %shl, %3
+  ret i64 %4
+}
+
+; CHECK-LABEL: f1:
+; CHECK: stlexd
+define i32 @f1(i8* %ptr, i64 %val) nounwind {
+entry:
+  %tmp4 = trunc i64 %val to i32
+  %tmp6 = lshr i64 %val, 32
+  %tmp7 = trunc i64 %tmp6 to i32
+  %stlexd = tail call i32 @llvm.arm.stlexd(i32 %tmp4, i32 %tmp7, i8* %ptr)
+  ret i32 %stlexd
+}
+
+declare %0 @llvm.arm.ldaexd(i8*) nounwind readonly
+declare i32 @llvm.arm.stlexd(i32, i32, i8*) nounwind
+
+; CHECK-LABEL: test_load_i8:
+; CHECK: ldaexb r0, [r0]
+; CHECK-NOT: uxtb
+; CHECK-NOT: and
+define zeroext i8 @test_load_i8(i8* %addr) {
+  %val = call i32 @llvm.arm.ldaex.p0i8(i8* %addr)
+  %val8 = trunc i32 %val to i8
+  ret i8 %val8
+}
+
+; CHECK-LABEL: test_load_i16:
+; CHECK: ldaexh r0, [r0]
+; CHECK-NOT: uxth
+; CHECK-NOT: and
+define zeroext i16 @test_load_i16(i16* %addr) {
+  %val = call i32 @llvm.arm.ldaex.p0i16(i16* %addr)
+  %val16 = trunc i32 %val to i16
+  ret i16 %val16
+}
+
+; CHECK-LABEL: test_load_i32:
+; CHECK: ldaex r0, [r0]
+define i32 @test_load_i32(i32* %addr) {
+  %val = call i32 @llvm.arm.ldaex.p0i32(i32* %addr)
+  ret i32 %val
+}
+
+declare i32 @llvm.arm.ldaex.p0i8(i8*) nounwind readonly
+declare i32 @llvm.arm.ldaex.p0i16(i16*) nounwind readonly
+declare i32 @llvm.arm.ldaex.p0i32(i32*) nounwind readonly
+
+; CHECK-LABEL: test_store_i8:
+; CHECK-NOT: uxtb
+; CHECK: stlexb r0, r1, [r2]
+define i32 @test_store_i8(i32, i8 %val, i8* %addr) {
+  %extval = zext i8 %val to i32
+  %res = call i32 @llvm.arm.stlex.p0i8(i32 %extval, i8* %addr)
+  ret i32 %res
+}
+
+; CHECK-LABEL: test_store_i16:
+; CHECK-NOT: uxth
+; CHECK: stlexh r0, r1, [r2]
+define i32 @test_store_i16(i32, i16 %val, i16* %addr) {
+  %extval = zext i16 %val to i32
+  %res = call i32 @llvm.arm.stlex.p0i16(i32 %extval, i16* %addr)
+  ret i32 %res
+}
+
+; CHECK-LABEL: test_store_i32:
+; CHECK: stlex r0, r1, [r2]
+define i32 @test_store_i32(i32, i32 %val, i32* %addr) {
+  %res = call i32 @llvm.arm.stlex.p0i32(i32 %val, i32* %addr)
+  ret i32 %res
+}
+
+declare i32 @llvm.arm.stlex.p0i8(i32, i8*) nounwind
+declare i32 @llvm.arm.stlex.p0i16(i32, i16*) nounwind
+declare i32 @llvm.arm.stlex.p0i32(i32, i32*) nounwind
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index d5b805c..3977da6 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=armv4t-apple-darwin | FileCheck %s -check-prefix=V4T
+; RUN: llc < %s -mtriple=armv7-apple-ios3.0 | FileCheck %s
+; RUN: llc < %s -mtriple=armv4t-apple-ios3.0 | FileCheck %s -check-prefix=V4T
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll
index e4c695b..57e9977 100644
--- a/test/CodeGen/ARM/ldr.ll
+++ b/test/CodeGen/ARM/ldr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f1(i32* %v) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll
index d29eb02..31aaba5 100644
--- a/test/CodeGen/ARM/ldr_ext.ll
+++ b/test/CodeGen/ARM/ldr_ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @test1(i8* %t1) nounwind {
 ; CHECK: ldrb
diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll
index f071b89..ed964ec 100644
--- a/test/CodeGen/ARM/ldr_frame.ll
+++ b/test/CodeGen/ARM/ldr_frame.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v4t | not grep mov
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
 
 define i32 @f1() {
 	%buf = alloca [32 x i32], align 4
@@ -29,3 +29,6 @@ define i32 @f4() {
         %tmp2 = zext i8 %tmp1 to i32
 	ret i32 %tmp2
 }
+
+; CHECK-NOT: mov
+
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
index f5ff7dd..2558b16 100644
--- a/test/CodeGen/ARM/ldr_post.ll
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s
 
 ; CHECK-LABEL: test1:
 ; CHECK: ldr {{.*, \[.*]}}, -r2
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
index 8281827..a97927a 100644
--- a/test/CodeGen/ARM/ldr_pre.ll
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s
 
 ; CHECK-LABEL: test1:
 ; CHECK: ldr {{.*!}}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 864d18a..caef2e7 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8 -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3 -check-prefix=CHECK
 ; rdar://6949835
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY -check-prefix=CHECK
 
 ; Magic ARM pair hints works best with linearscan / fast.
 
diff --git a/test/CodeGen/ARM/ldstrex.ll b/test/CodeGen/ARM/ldstrex.ll
index 5eaae53..a40e255 100644
--- a/test/CodeGen/ARM/ldstrex.ll
+++ b/test/CodeGen/ARM/ldstrex.ll
@@ -36,17 +36,21 @@ declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
 ; CHECK-LABEL: test_load_i8:
 ; CHECK: ldrexb r0, [r0]
 ; CHECK-NOT: uxtb
-define i32 @test_load_i8(i8* %addr) {
+; CHECK-NOT: and
+define zeroext i8 @test_load_i8(i8* %addr) {
   %val = call i32 @llvm.arm.ldrex.p0i8(i8* %addr)
-  ret i32 %val
+  %val8 = trunc i32 %val to i8
+  ret i8 %val8
 }
 
 ; CHECK-LABEL: test_load_i16:
 ; CHECK: ldrexh r0, [r0]
 ; CHECK-NOT: uxth
-define i32 @test_load_i16(i16* %addr) {
+; CHECK-NOT: and
+define zeroext i16 @test_load_i16(i16* %addr) {
   %val = call i32 @llvm.arm.ldrex.p0i16(i16* %addr)
-  ret i32 %val
+  %val16 = trunc i32 %val to i16
+  ret i16 %val16
 }
 
 ; CHECK-LABEL: test_load_i32:
@@ -137,3 +141,19 @@ define void @excl_addrmode() {
 
   ret void
 }
+
+; LLVM should know, even across basic blocks, that ldrex is setting the high
+; bits of its i32 to 0. There should be no zero-extend operation.
+define zeroext i8 @test_cross_block_zext_i8(i1 %tst, i8* %addr) {
+; CHECK: test_cross_block_zext_i8:
+; CHECK-NOT: uxtb
+; CHECK-NOT: and
+; CHECK: bx lr
+  %val = call i32 @llvm.arm.ldrex.p0i8(i8* %addr)
+  br i1 %tst, label %end, label %mid
+mid:
+  ret i8 42
+end:
+  %val8 = trunc i32 %val to i8
+  ret i8 %val8
+}
diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll
index 253b0e1..ca16adc 100644
--- a/test/CodeGen/ARM/load.ll
+++ b/test/CodeGen/ARM/load.ll
@@ -1,9 +1,4 @@
-; RUN: llc < %s -march=arm > %t
-; RUN: grep ldrsb %t
-; RUN: grep ldrb %t
-; RUN: grep ldrsh %t
-; RUN: grep ldrh %t
-
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f1(i8* %p) {
 entry:
@@ -32,3 +27,9 @@ entry:
         %tmp4 = zext i16 %tmp to i32             ; <i32> [#uses=1]
         ret i32 %tmp4
 }
+
+; CHECK: ldrsb
+; CHECK: ldrb
+; CHECK: ldrsh
+; CHECK: ldrh
+
diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll
index c76a5e4..f09167e 100644
--- a/test/CodeGen/ARM/long-setcc.ll
+++ b/test/CodeGen/ARM/long-setcc.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm | grep cmp | count 1
-
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i1 @t1(i64 %x) {
 	%B = icmp slt i64 %x, 0
@@ -15,3 +14,7 @@ define i1 @t3(i32 %x) {
 	%tmp = icmp ugt i32 %x, -1
 	ret i1 %tmp
 }
+
+; CHECK: cmp
+; CHECK-NOT: cmp
+
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index 7fffc81..d0bff4a 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i64 @f1() {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll
index 2cf91c3..5636a12 100644
--- a/test/CodeGen/ARM/longMAC.ll
+++ b/test/CodeGen/ARM/longMAC.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7
 ; Check generated signed and unsigned multiply accumulate long.
 
 define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
@@ -42,3 +43,28 @@ define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
   %add = add nsw i64 %mul, %conv2
   ret i64 %add
 }
+
+; Two things to check here: the @earlyclobber constraint (on <= v5) and the "$Rd = $R" ones.
+;    + Without @earlyclobber the v7 code is natural. With it, the first two
+;      registers must be distinct from the third.
+;    + Without "$Rd = $R", this can be satisfied without a mov before the umlal
+;      by trying to use 6 different registers in the MachineInstr. The natural
+;      evolution of this attempt currently leaves only two movs in the final
+;      function, both after the umlal. With it, *some* move has to happen
+;      before the umlal.
+define i64 @MACLongTest5(i64 %c, i32 %a, i32 %b) {
+; CHECK-V7-LABEL: MACLongTest5:
+; CHECK-V7-LABEL: umlal r0, r1, r0, r0
+
+; CHECK-LABEL: MACLongTest5:
+; CHECK: mov [[RDLO:r[0-9]+]], r0
+; CHECK: umlal [[RDLO]], r1, r0, r0
+; CHECK: mov r0, [[RDLO]]
+
+  %conv.trunc = trunc i64 %c to i32
+  %conv = zext i32 %conv.trunc to i64
+  %conv1 = zext i32 %b to i64
+  %mul = mul i64 %conv, %conv
+  %add = add i64 %mul, %c
+  ret i64 %add
+}
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 3e986d80..48b0ba7 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i64 @f0(i64 %A, i64 %B) {
 ; CHECK-LABEL: f0:
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 0c8d387..9480241 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep lsl | grep -F "lsl #2]"
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 ; Should use scaled addressing mode.
 
 define void @sintzero(i32* %a) nounwind {
@@ -17,3 +17,6 @@ cond_next:		; preds = %cond_next, %entry
 return:		; preds = %cond_next
 	ret void
 }
+
+; CHECK: lsl{{.*}}#2]
+
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 26d4be2..1dafa00 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -4,7 +4,7 @@
 ; register pressure and therefore spilling. There is more room for improvement
 ; here.
 
-; CHECK: sub sp, #{{40|32|28|24}}
+; CHECK: sub sp, #{{40|36|32|28|24}}
 
 ; CHECK: %for.inc
 ; CHECK-NOT: ldr
diff --git a/test/CodeGen/ARM/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll
index fc9b226..ca65501 100644
--- a/test/CodeGen/ARM/machine-licm.ll
+++ b/test/CodeGen/ARM/machine-licm.ll
@@ -5,20 +5,12 @@
 ; rdar://7354376
 ; rdar://8887598
 
-; The generated code is no where near ideal. It's not recognizing the two
-; constantpool entries being loaded can be merged into one.
-
 @GV = external global i32                         ; <i32*> [#uses=2]
 
 define void @t(i32* nocapture %vals, i32 %c) nounwind {
 entry:
 ; ARM-LABEL: t:
 ; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0
-; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool.
-; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy
-; to add the pseudo instructions to make sure they are CSE'ed at the same
-; time as the "ldr cp".
-; ARM: ldr r{{[0-9]+}}, LCPI0_1
 ; ARM: LPC0_0:
 ; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]]
 ; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
@@ -36,7 +28,7 @@ entry:
 
 bb.nph:                                           ; preds = %entry
 ; ARM: LCPI0_0:
-; ARM: LCPI0_1:
+; ARM-NOT: LCPI0_1:
 ; ARM: .section
 
 ; THUMB: BB#1
diff --git a/test/CodeGen/ARM/mature-mc-support.ll b/test/CodeGen/ARM/mature-mc-support.ll
new file mode 100644
index 0000000..0a7e5b9
--- /dev/null
+++ b/test/CodeGen/ARM/mature-mc-support.ll
@@ -0,0 +1,12 @@
+; Test that inline assembly is parsed by the MC layer when MC support is mature
+; (even when the output is assembly).
+
+; RUN: not llc -mtriple=arm-pc-linux < %s > /dev/null 2> %t1
+; RUN: FileCheck %s < %t1
+
+; RUN: not llc -mtriple=arm-pc-linux -filetype=obj < %s > /dev/null 2> %t2
+; RUN: FileCheck %s < %t2
+
+module asm "	.this_directive_is_very_unlikely_to_exist"
+
+; CHECK: LLVM ERROR: Error parsing inline asm
diff --git a/test/CodeGen/ARM/mem.ll b/test/CodeGen/ARM/mem.ll
index f46c7a5..3c9cd91 100644
--- a/test/CodeGen/ARM/mem.ll
+++ b/test/CodeGen/ARM/mem.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm | grep strb
-; RUN: llc < %s -march=arm | grep strh
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define void @f1() {
 entry:
@@ -7,8 +6,13 @@ entry:
         ret void
 }
 
+; CHECK: strb
+
 define void @f2() {
 entry:
         store i16 0, i16* null
         ret void
 }
+
+; CHECK: strh
+
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 946c63e..14d84de 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -38,7 +38,8 @@ entry:
 define void @t2(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t2:
-; CHECK: ldr [[REG2:r[0-9]+]], [r1, #32]
+; CHECK: movw [[REG2:r[0-9]+]], #16716
+; CHECK: movt [[REG2:r[0-9]+]], #72
 ; CHECK: str [[REG2]], [r0, #32]
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
@@ -79,7 +80,8 @@ entry:
 ; CHECK: strb [[REG5]], [r0, #6]
 ; CHECK: movw [[REG6:r[0-9]+]], #21587
 ; CHECK: strh [[REG6]], [r0, #4]
-; CHECK: ldr [[REG7:r[0-9]+]], 
+; CHECK: movw [[REG7:r[0-9]+]], #18500
+; CHECK: movt [[REG7:r[0-9]+]], #22866
 ; CHECK: str [[REG7]]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
   ret void
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index fe0056c..8d3800b 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7m-none-macho -o - | FileCheck %s --check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
+; RUN: llc < %s -mtriple=arm-none-eabihf -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
 @to = common global [500 x i32] zeroinitializer, align 4
diff --git a/test/CodeGen/ARM/minsize-imms.ll b/test/CodeGen/ARM/minsize-imms.ll
new file mode 100644
index 0000000..4c8ff39
--- /dev/null
+++ b/test/CodeGen/ARM/minsize-imms.ll
@@ -0,0 +1,57 @@
+; RUN: llc -mtriple=thumbv7m-macho -o - -show-mc-encoding %s | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-macho -o - -show-mc-encoding %s | FileCheck %s --check-prefix=CHECK-V6M
+; RUN: llc -mtriple=armv6-macho -o - -show-mc-encoding %s | FileCheck %s --check-prefix=CHECK-ARM
+define i32 @test_mov() minsize {
+; CHECK-LABEL: test_mov:
+; CHECK: movs r0, #255 @ encoding: [0xff,0x20]
+
+  ret i32 255
+}
+
+define i32 @test_mov_mvn() minsize {
+; CHECK-LABEL: test_mov_mvn:
+; CHECK: mvn r0, #203 @ encoding: [0x6f,0xf0,0xcb,0x00]
+
+; CHECK-V6M-LABEL: test_mov_mvn:
+; CHECK-V6M: movs [[TMP:r[0-7]]], #203 @ encoding: [0xcb,0x20]
+; CHECK-V6M: mvns r0, [[TMP]] @ encoding: [0xc0,0x43]
+
+; CHECK-ARM-LABEL: test_mov_mvn:
+; CHECK-ARM: mvn r0, #203 @ encoding: [0xcb,0x00,0xe0,0xe3]
+  ret i32 4294967092
+}
+
+define i32 @test_mov_lsl() minsize {
+; CHECK-LABEL: test_mov_lsl:
+; CHECK: mov.w r0, #589824 @ encoding: [0x4f,0xf4,0x10,0x20]
+
+; CHECK-V6M-LABEL: test_mov_lsl:
+; CHECK-V6M: movs [[TMP:r[0-7]]], #9 @ encoding: [0x09,0x20]
+; CHECK-V6M: lsls r0, [[TMP]], #16 @ encoding: [0x00,0x04]
+
+; CHECK-ARM-LABEL: test_mov_lsl:
+; CHECK-ARM: mov r0, #589824 @ encoding: [0x09,0x08,0xa0,0xe3]
+  ret i32 589824
+}
+
+define i32 @test_movw() minsize {
+; CHECK-LABEL: test_movw:
+; CHECK: movw r0, #65535
+
+; CHECK-V6M-LABEL: test_movw:
+; CHECK-V6M: ldr r0, [[CONSTPOOL:LCPI[0-9]+_[0-9]+]] @ encoding: [A,0x48]
+; CHECK-V6M: [[CONSTPOOL]]:
+; CHECK-V6M-NEXT: .long 65535
+
+; CHECK-ARM-LABEL: test_movw:
+; CHECK-ARM: mov r0, #255 @ encoding: [0xff,0x00,0xa0,0xe3]
+; CHECK-ARM: orr r0, r0, #65280 @ encoding: [0xff,0x0c,0x80,0xe3]
+ ret i32 65535
+}
+
+define i32 @test_regress1() {
+; CHECK-ARM-LABEL: test_regress1:
+; CHECK-ARM: mov r0, #248 @ encoding: [0xf8,0x00,0xa0,0xe3]
+; CHECK-ARM: orr r0, r0, #16252928 @ encoding: [0x3e,0x07,0x80,0xe3]
+  ret i32 16253176
+}
diff --git a/test/CodeGen/ARM/minsize-litpools.ll b/test/CodeGen/ARM/minsize-litpools.ll
new file mode 100644
index 0000000..d5cd2a9
--- /dev/null
+++ b/test/CodeGen/ARM/minsize-litpools.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=thumbv7s %s -o -  | FileCheck %s
+; RUN: llc -mtriple=armv7s %s -o -  | FileCheck %s
+
+; CodeGen should be able to set and reset the MinSize subtarget-feature, and
+; make use of it in deciding whether to use MOVW/MOVT for global variables or a
+; lit-pool load (saving roughly 2 bytes of code).
+
+@var = global i32 0
+
+define i32 @small_global() minsize {
+; CHECK-LABEL: small_global:
+; CHECK: ldr r[[GLOBDEST:[0-9]+]], {{.?LCPI0_0}}
+; CHECK: ldr r0, [r[[GLOBDEST]]]
+
+  %val = load i32* @var
+  ret i32 %val
+}
+
+define i32 @big_global() {
+; CHECK-LABEL: big_global:
+; CHECK: movw [[GLOBDEST:r[0-9]+]], :lower16:var
+; CHECK: movt [[GLOBDEST]], :upper16:var
+
+  %val = load i32* @var
+  ret i32 %val
+}
diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll
index 8f0d3a8..6776e63 100644
--- a/test/CodeGen/ARM/mls.ll
+++ b/test/CodeGen/ARM/mls.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+v6t2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
+; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 -arm-use-mulops=false %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NO_MULOPS
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll
index bbedea1..1e10af1 100644
--- a/test/CodeGen/ARM/movt-movw-global.ll
+++ b/test/CodeGen/ARM/movt-movw-global.ll
@@ -16,8 +16,8 @@ entry:
 ; IOS-PIC:      movw    r0, :lower16:(L_foo$non_lazy_ptr-(LPC0_0+8))
 ; IOS-PIC-NEXT: movt    r0, :upper16:(L_foo$non_lazy_ptr-(LPC0_0+8))
 
-; IOS-STATIC-NOT:      movw    r0, :lower16:_foo
-; IOS-STATIC-NOT:       movt    r0, :upper16:_foo
+; IOS-STATIC:      movw    r0, :lower16:_foo
+; IOS-STATIC-NEXT:       movt    r0, :upper16:_foo
   ret i32* @foo
 }
 
@@ -32,8 +32,8 @@ entry:
 ; IOS-PIC:      movw    r1, :lower16:(L_foo$non_lazy_ptr-(LPC1_0+8))
 ; IOS-PIC-NEXT: movt    r1, :upper16:(L_foo$non_lazy_ptr-(LPC1_0+8))
 
-; IOS-STATIC-NOT:      movw    r1, :lower16:_foo
-; IOS-STATIC-NOT:      movt    r1, :upper16:_foo
+; IOS-STATIC:      movw    r1, :lower16:_foo
+; IOS-STATIC-NEXT:      movt    r1, :upper16:_foo
   store i32 %baz, i32* @foo, align 4
   ret void
 }
diff --git a/test/CodeGen/ARM/movt.ll b/test/CodeGen/ARM/movt.ll
index 25c1bfe..735d949 100644
--- a/test/CodeGen/ARM/movt.ll
+++ b/test/CodeGen/ARM/movt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
 ; rdar://7317664
 
 define i32 @t(i32 %X) nounwind {
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 482d8f2..ada3d4e 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @t9(i32 %v) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/mulhi.ll b/test/CodeGen/ARM/mulhi.ll
index 63705c5..c66a804 100644
--- a/test/CodeGen/ARM/mulhi.ll
+++ b/test/CodeGen/ARM/mulhi.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=V4
-; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=M3
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s -check-prefix=V6
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=V4
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m3 %s -o - | FileCheck %s -check-prefix=M3
 
 define i32 @smulhi(i32 %x, i32 %y) nounwind {
 ; V6-LABEL: smulhi:
diff --git a/test/CodeGen/ARM/mult-alt-generic-arm.ll b/test/CodeGen/ARM/mult-alt-generic-arm.ll
index a8104db..05e9b0f 100644
--- a/test/CodeGen/ARM/mult-alt-generic-arm.ll
+++ b/test/CodeGen/ARM/mult-alt-generic-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm -no-integrated-as
 ; ModuleID = 'mult-alt-generic.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
 target triple = "arm"
diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll
index 2c5ccd7..489f247 100644
--- a/test/CodeGen/ARM/mvn.ll
+++ b/test/CodeGen/ARM/mvn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep mvn | count 9
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f1() {
 entry:
@@ -72,3 +72,16 @@ entry:
 	%tmp102 = icmp eq i32 -2, %a		; <i1> [#uses=1]
 	ret i1 %tmp102
 }
+
+; CHECK-LABEL: f1
+; CHECK: mvn
+; CHECK: mvn
+; CHECK: mvn
+; CHECK: mvn
+; CHECK: mvn
+; CHECK: mvn
+; CHECK: mvn
+; CHECK: mvn
+; CHECK: mvn
+; CHECK-NOT: mvn
+
diff --git a/test/CodeGen/ARM/neon_arith1.ll b/test/CodeGen/ARM/neon_arith1.ll
index 5892737..42e7d82 100644
--- a/test/CodeGen/ARM/neon_arith1.ll
+++ b/test/CodeGen/ARM/neon_arith1.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vadd
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
 entry:
 	%0 = add <8 x i8> %a, %b
 	ret <8 x i8> %0
 }
+
+; CHECK: vadd
+
diff --git a/test/CodeGen/ARM/neon_cmp.ll b/test/CodeGen/ARM/neon_cmp.ll
index 046b5da..e1662c4 100644
--- a/test/CodeGen/ARM/neon_cmp.ll
+++ b/test/CodeGen/ARM/neon_cmp.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s
+
 ; bug 15283
 ; radar://13191881
 ; CHECK: vfcmp
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
index 4a82c36..4f1607e 100644
--- a/test/CodeGen/ARM/neon_div.ll
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -pre-RA-sched=source -disable-post-ra %s -o - \
+; RUN:  | FileCheck %s
 
 define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecpe.f32
diff --git a/test/CodeGen/ARM/neon_fpconv.ll b/test/CodeGen/ARM/neon_fpconv.ll
index 149f4c7..8e37ce7 100644
--- a/test/CodeGen/ARM/neon_fpconv.ll
+++ b/test/CodeGen/ARM/neon_fpconv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 ; PR12540: ARM backend lowering of FP_ROUND v2f64 to v2f32.
 define <2 x float> @vtrunc(<2 x double> %a) {
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index b892d2d..9fd3fc5 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
 
 ; CHECK: t1
 ; CHECK: vldr d
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 25a670b..571a16a 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s --check-prefix=SWIFT
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mcpu=swift %s -o - | FileCheck %s --check-prefix=SWIFT
 
 ; CHECK: t1
 ; CHECK: vld1.64
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
index 2e45919..84e4b30 100644
--- a/test/CodeGen/ARM/neon_minmax.ll
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s
 
 define float @fmin_ole(float %x) nounwind {
 ;CHECK-LABEL: fmin_ole:
diff --git a/test/CodeGen/ARM/neon_shift.ll b/test/CodeGen/ARM/neon_shift.ll
index 340f220..3c09358 100644
--- a/test/CodeGen/ARM/neon_shift.ll
+++ b/test/CodeGen/ARM/neon_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 ; <rdar://problem/9055897>
 define <4 x i16> @t1(<4 x i32> %a) nounwind {
diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll
index 76b6044..7a02512 100644
--- a/test/CodeGen/ARM/neon_vabs.ll
+++ b/test/CodeGen/ARM/neon_vabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <4 x i32> @test1(<4 x i32> %a) nounwind {
 ; CHECK-LABEL: test1:
diff --git a/test/CodeGen/ARM/none-macho.ll b/test/CodeGen/ARM/none-macho.ll
new file mode 100644
index 0000000..2795b8c
--- /dev/null
+++ b/test/CodeGen/ARM/none-macho.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=thumbv7m-none-macho %s -o - -relocation-model=pic -disable-fp-elim | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NON-FAST
+; RUN: llc -mtriple=thumbv7m-none-macho -O0 %s -o - -relocation-model=pic -disable-fp-elim | FileCheck %s
+; RUN: llc -mtriple=thumbv7m-none-macho -filetype=obj %s -o /dev/null
+
+  ; Bare-metal should probably "declare" segments just like normal MachO
+; CHECK: __picsymbolstub4
+; CHECK: __StaticInit
+; CHECK: __text
+
+@var = external global i32
+
+define i32 @test_litpool() minsize {
+; CHECK-LABEL: test_litpool:
+  %val = load i32* @var
+  ret i32 %val
+
+  ; Lit-pool entries need to produce a "$non_lazy_ptr" version of the symbol.
+; CHECK: LCPI0_0:
+; CHECK-NEXT: .long L_var$non_lazy_ptr-(LPC0_0+4)
+}
+
+define i32 @test_movw_movt() {
+; CHECK-LABEL: test_movw_movt:
+  %val = load i32* @var
+  ret i32 %val
+
+  ; movw/movt should also address their symbols MachO-style
+; CHECK: movw [[RTMP:r[0-9]+]], :lower16:(L_var$non_lazy_ptr-(LPC1_0+4))
+; CHECK: movt [[RTMP]], :upper16:(L_var$non_lazy_ptr-(LPC1_0+4))
+; CHECK: LPC1_0:
+; CHECK: add [[RTMP]], pc
+}
+
+declare void @llvm.trap()
+
+define void @test_trap() {
+; CHECK-LABEL: test_trap:
+
+  ; Bare-metal MachO gets compiled on top of normal MachO toolchain which
+  ; understands trap natively.
+  call void @llvm.trap()
+; CHECK: trap
+
+  ret void
+}
+
+define i32 @test_frame_ptr() {
+; CHECK-LABEL: test_frame_ptr:
+  call void @test_trap()
+
+  ; Frame pointer is r7 as for Darwin
+; CHECK: mov r7, sp
+  ret i32 42
+}
+
+%big_arr = type [8 x i32]
+define void @test_two_areas(%big_arr* %addr) {
+; CHECK-LABEL: test_two_areas:
+  %val = load %big_arr* %addr
+  call void @test_trap()
+  store %big_arr %val, %big_arr* %addr
+
+  ; This goes with the choice of r7 as FP (largely). FP and LR have to be stored
+  ; consecutively on the stack for the frame record to be valid, which means we
+  ; need the 2 register-save areas employed by iOS.
+; CHECK-NON-FAST: push {r4, r5, r6, r7, lr}
+; CHECK-NON-FAST: push.w {r8, r9, r10, r11}
+; ...
+; CHECK-NON-FAST: pop.w {r8, r9, r10, r11}
+; CHECK-NON-FAST: pop {r4, r5, r6, r7, pc}
+  ret void
+}
+
+define void @test_tail_call() {
+; CHECK-LABEL: test_tail_call:
+  tail call void @test_trap()
+
+  ; Tail calls should be available and use Thumb2 branch.
+; CHECK: b.w _test_trap
+  ret void
+}
+
+define float @test_softfloat_calls(float %in) {
+; CHECK-LABEL: test_softfloat_calls:
+  %sum = fadd float %in, %in
+
+  ; Soft-float calls should be GNU-style rather than RTABI and should not be the
+  ; *vfp variants used for ARMv6 iOS.
+; CHECK: blx ___addsf3{{$}}
+  ret float %sum
+}
+
+  ; Even bare-metal PIC needs GOT-like behaviour, in principle. Depends a bit on
+  ; the use-case of course, but LLVM doesn't know what that is.
+; CHECK: non_lazy_symbol_pointers
+; CHECK: L_var$non_lazy_ptr:
+; CHECK-NEXT:   .indirect_symbol _var
+
+  ; All MachO objects should have this to give the linker leeway in removing
+  ; dead code.
+; CHECK: .subsections_via_symbols
diff --git a/test/CodeGen/ARM/noreturn.ll b/test/CodeGen/ARM/noreturn.ll
index 4c876ce..edc3333 100644
--- a/test/CodeGen/ARM/noreturn.ll
+++ b/test/CodeGen/ARM/noreturn.ll
@@ -43,6 +43,23 @@ entry:
   unreachable
 }
 
+; Test case for uwtable
+define i32 @test4() uwtable {
+; CHECK-LABEL: @test4
+; CHECK: push
+entry:
+  tail call void @overflow() #0
+  unreachable
+}
+
+define i32 @test5() uwtable {
+; CHECK-LABEL: @test5
+; CHECK: push
+entry:
+  tail call void @overflow_with_unwind() #1
+  unreachable
+}
+
 ; Function Attrs: noreturn
 declare void @overflow_with_unwind() #1
 
diff --git a/test/CodeGen/ARM/optimize-dmbs-v7.ll b/test/CodeGen/ARM/optimize-dmbs-v7.ll
new file mode 100644
index 0000000..64f5e20
--- /dev/null
+++ b/test/CodeGen/ARM/optimize-dmbs-v7.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=armv7 -mattr=+db | FileCheck %s
+
+@x1 = global i32 0, align 4
+@x2 = global i32 0, align 4
+
+define void @test() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.013 = phi i32 [ 1, %entry ], [ %inc6, %for.body ]
+  store atomic i32 %i.013, i32* @x1 seq_cst, align 4
+  store atomic i32 %i.013, i32* @x1 seq_cst, align 4
+  store atomic i32 %i.013, i32* @x2 seq_cst, align 4
+  %inc6 = add nsw i32 %i.013, 1
+  %exitcond = icmp eq i32 %inc6, 2
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+
+; The for.body contains 3 seq_cst stores.
+; Hence it should have 3 dmb;str;dmb sequences with the middle dmbs collapsed
+; CHECK: %for.body
+; CHECK-NOT: str
+; CHECK: dmb
+; CHECK-NOT: dmb
+; CHECK: str
+
+; CHECK-NOT: str
+; CHECK: dmb
+; CHECK-NOT: dmb
+; CHECK: str
+
+; CHECK-NOT: str
+; CHECK: dmb
+; CHECK-NOT: dmb
+; CHECK: str
+
+; CHECK-NOT: str
+; CHECK: dmb
+; CHECK-NOT: dmb
+; CHECK-NOT: str
+; CHECK: %for.end
+}
+
+define void @test2() {
+  call void @llvm.arm.dmb(i32 11)
+  tail call void @test()
+  call void @llvm.arm.dmb(i32 11)
+  ret void
+; the call should prevent the two dmbs from collapsing
+; CHECK: test2:
+; CHECK: dmb
+; CHECK-NEXT: bl
+; CHECK-NEXT: dmb
+}
+
+define void @test3() {
+  call void @llvm.arm.dmb(i32 11)
+  call void @llvm.arm.dsb(i32 9)
+  call void @llvm.arm.dmb(i32 11)
+  ret void
+; the call should prevent the two dmbs from collapsing
+; CHECK: test3:
+; CHECK: dmb
+; CHECK-NEXT: dsb
+; CHECK-NEXT: dmb
+
+}
+
+
+declare void @llvm.arm.dmb(i32)
+declare void @llvm.arm.dsb(i32)
diff --git a/test/CodeGen/ARM/optselect-regclass.ll b/test/CodeGen/ARM/optselect-regclass.ll
index 1aa4520..0acb2f2 100644
--- a/test/CodeGen/ARM/optselect-regclass.ll
+++ b/test/CodeGen/ARM/optselect-regclass.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mcpu=swift -verify-machineinstrs
+; RUN: llc -mtriple=arm-eabi -mcpu=swift -verify-machineinstrs %s -o /dev/null
+
 %union.opcode.0.2.5.8.15.28 = type { i32 }
 
 @opcode = external global %union.opcode.0.2.5.8.15.28, align 4
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
index fbc1155..89abe28 100644
--- a/test/CodeGen/ARM/pack.ll
+++ b/test/CodeGen/ARM/pack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s
 
 ; CHECK: test1
 ; CHECK: pkhbt   r0, r0, r1, lsl #16
diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll
index dc1a95b..94bced5 100644
--- a/test/CodeGen/ARM/phi.ll
+++ b/test/CodeGen/ARM/phi.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=arm -mattr=+v4t < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
+
 ; <rdar://problem/8686347>
 
 define i32 @test1(i1 %a, i32* %b) {
diff --git a/test/CodeGen/ARM/popcnt.ll b/test/CodeGen/ARM/popcnt.ll
index bdf793d..7ace640 100644
--- a/test/CodeGen/ARM/popcnt.ll
+++ b/test/CodeGen/ARM/popcnt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 ; Implement ctpop with vcnt
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
diff --git a/test/CodeGen/ARM/prefetch-thumb.ll b/test/CodeGen/ARM/prefetch-thumb.ll
deleted file mode 100644
index e6f6ae8..0000000
--- a/test/CodeGen/ARM/prefetch-thumb.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=thumb -mattr=+v7         | FileCheck %s -check-prefix=THUMB2
-; TODO: This test case will be merged back into prefetch.ll when ARM mode issue is solved.
-
-declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
-
-define void @t6() {
-entry:
-;ARM: t6:
-;ARM: pld [sp]
-;ARM: pld [sp, #50]
-
-;THUMB2: t6:
-;THUMB2: pld [sp]
-;THUMB2: pld [sp, #50]
-
-%red = alloca [100 x i8], align 1
-%0 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 0
-%1 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 50
-call void @llvm.prefetch(i8* %0, i32 0, i32 3, i32 1)
-call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
-ret void
-}
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
index 5badb31..7350e0a 100644
--- a/test/CodeGen/ARM/prefetch.ll
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
-; RUN: llc < %s -march=thumb -mattr=+v7         | FileCheck %s -check-prefix=THUMB2
-; RUN: llc < %s -march=arm   -mattr=+v7         | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -march=arm   -mcpu=cortex-a9-mp | FileCheck %s -check-prefix=ARM-MP
+; RUN: llc -mtriple=thumb-eabi -mattr=-thumb2 %s -o - | FileCheck %s -check-prefix CHECK-T1
+; RUN: llc -mtriple=thumb-eabi -mattr=+v7 %s -o - | FileCheck %s -check-prefix=THUMB2
+; RUN: llc -mtriple=arm-eabi -mattr=+v7 %s -o - | FileCheck %s -check-prefix=ARM
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9-mp %s -o - | FileCheck %s -check-prefix=ARM-MP
 ; rdar://8601536
 
+; CHECK-T1-NOT: pld
+
 define void @t1(i8* %ptr) nounwind  {
 entry:
 ; ARM-LABEL: t1:
@@ -75,3 +77,21 @@ entry:
   tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 0 )
   ret void
 }
+
+define void @t6() {
+entry:
+;ARM-LABEL: t6:
+;ARM: pld [sp]
+;ARM: pld [sp, #50]
+
+;THUMB2-LABEL: t6:
+;THUMB2: pld [sp]
+;THUMB2: pld [sp, #50]
+
+%red = alloca [100 x i8], align 1
+%0 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 0
+%1 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 50
+call void @llvm.prefetch(i8* %0, i32 0, i32 3, i32 1)
+call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
+ret void
+}
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 25484f4..b245674 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -34,9 +34,11 @@ entry:
   %12 = sext <4 x i16> %11 to <4 x i32>           ; <<4 x i32>> [#uses=1]
   %13 = mul <4 x i32> %1, %9                      ; <<4 x i32>> [#uses=1]
   %14 = mul <4 x i32> %3, %12                     ; <<4 x i32>> [#uses=1]
-  %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
-  %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
-  %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
+  %15 = lshr <4 x i32> %13, <i32 12, i32 12, i32 12, i32 12>
+  %trunc_15 = trunc <4 x i32> %15 to <4 x i16>
+  %16 = lshr <4 x i32> %14, <i32 12, i32 12, i32 12, i32 12>
+  %trunc_16 = trunc <4 x i32> %16 to <4 x i16>
+  %17 = shufflevector <4 x i16> %trunc_15, <4 x i16> %trunc_16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
   %18 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
   tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1)
   ret void
diff --git a/test/CodeGen/ARM/ret0.ll b/test/CodeGen/ARM/ret0.ll
index 5c312eb..e51067b 100644
--- a/test/CodeGen/ARM/ret0.ll
+++ b/test/CodeGen/ARM/ret0.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define i32 @test() {
         ret i32 0
diff --git a/test/CodeGen/ARM/ret_arg1.ll b/test/CodeGen/ARM/ret_arg1.ll
index 1ab947b..b7eef20 100644
--- a/test/CodeGen/ARM/ret_arg1.ll
+++ b/test/CodeGen/ARM/ret_arg1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define i32 @test(i32 %a1) {
         ret i32 %a1
diff --git a/test/CodeGen/ARM/ret_arg2.ll b/test/CodeGen/ARM/ret_arg2.ll
index 84477d0..bcb379b 100644
--- a/test/CodeGen/ARM/ret_arg2.ll
+++ b/test/CodeGen/ARM/ret_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define i32 @test(i32 %a1, i32 %a2) {
         ret i32 %a2
diff --git a/test/CodeGen/ARM/ret_arg3.ll b/test/CodeGen/ARM/ret_arg3.ll
index f7f9057..625162f 100644
--- a/test/CodeGen/ARM/ret_arg3.ll
+++ b/test/CodeGen/ARM/ret_arg3.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
+
 define i32 @test(i32 %a1, i32 %a2, i32 %a3) {
         ret i32 %a3
 }
diff --git a/test/CodeGen/ARM/ret_arg4.ll b/test/CodeGen/ARM/ret_arg4.ll
index f7b3e4a..81b55fe 100644
--- a/test/CodeGen/ARM/ret_arg4.ll
+++ b/test/CodeGen/ARM/ret_arg4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
         ret i32 %a4
diff --git a/test/CodeGen/ARM/ret_arg5.ll b/test/CodeGen/ARM/ret_arg5.ll
index c4f9fb5..680e89f 100644
--- a/test/CodeGen/ARM/ret_arg5.ll
+++ b/test/CodeGen/ARM/ret_arg5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
         ret i32 %a5
diff --git a/test/CodeGen/ARM/ret_f32_arg2.ll b/test/CodeGen/ARM/ret_f32_arg2.ll
index 2bafea6..0caee0b 100644
--- a/test/CodeGen/ARM/ret_f32_arg2.ll
+++ b/test/CodeGen/ARM/ret_f32_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define float @test_f32(float %a1, float %a2) {
         ret float %a2
diff --git a/test/CodeGen/ARM/ret_f32_arg5.ll b/test/CodeGen/ARM/ret_f32_arg5.ll
index c6ce60e..d39dc7e 100644
--- a/test/CodeGen/ARM/ret_f32_arg5.ll
+++ b/test/CodeGen/ARM/ret_f32_arg5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) {
         ret float %a5
diff --git a/test/CodeGen/ARM/ret_f64_arg2.ll b/test/CodeGen/ARM/ret_f64_arg2.ll
index 386e85f..c4519ff 100644
--- a/test/CodeGen/ARM/ret_f64_arg2.ll
+++ b/test/CodeGen/ARM/ret_f64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define double @test_f64(double %a1, double %a2) {
         ret double %a2
diff --git a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
index bdb0a60..ef11250 100644
--- a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mcpu=arm8 -mattr=+vfp2 %s -o /dev/null
 
 define double @test_double_arg_reg_split(i32 %a1, double %a2) {
         ret double %a2
diff --git a/test/CodeGen/ARM/ret_f64_arg_split.ll b/test/CodeGen/ARM/ret_f64_arg_split.ll
index 4f841a3..1130920 100644
--- a/test/CodeGen/ARM/ret_f64_arg_split.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) {
         ret double %a3
diff --git a/test/CodeGen/ARM/ret_f64_arg_stack.ll b/test/CodeGen/ARM/ret_f64_arg_stack.ll
index 2144317..f45923e 100644
--- a/test/CodeGen/ARM/ret_f64_arg_stack.ll
+++ b/test/CodeGen/ARM/ret_f64_arg_stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) {
         ret double %a4
diff --git a/test/CodeGen/ARM/ret_i128_arg2.ll b/test/CodeGen/ARM/ret_i128_arg2.ll
index 908c34f..a87f3f2 100644
--- a/test/CodeGen/ARM/ret_i128_arg2.ll
+++ b/test/CodeGen/ARM/ret_i128_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) {
         ret i128 %a3
diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll
index b1a1024..c51d2b8 100644
--- a/test/CodeGen/ARM/ret_i64_arg2.ll
+++ b/test/CodeGen/ARM/ret_i64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -march=arm -mattr=+vfp2 %s -o /dev/null
 
 define i64 @test_i64(i64 %a1, i64 %a2) {
         ret i64 %a2
diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll
index ffc1d2f..602997e 100644
--- a/test/CodeGen/ARM/ret_i64_arg3.ll
+++ b/test/CodeGen/ARM/ret_i64_arg3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -march=arm -mattr=+vfp2 %s -o /dev/null
 
 define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/ret_i64_arg_split.ll b/test/CodeGen/ARM/ret_i64_arg_split.ll
index 956bce5..0583b27 100644
--- a/test/CodeGen/ARM/ret_i64_arg_split.ll
+++ b/test/CodeGen/ARM/ret_i64_arg_split.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/ret_void.ll b/test/CodeGen/ARM/ret_void.ll
index 2b7ae05..93dc5c1 100644
--- a/test/CodeGen/ARM/ret_void.ll
+++ b/test/CodeGen/ARM/ret_void.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define void @test() {
         ret void
diff --git a/test/CodeGen/ARM/returned-ext.ll b/test/CodeGen/ARM/returned-ext.ll
index d2cdeb0..925e9e7 100644
--- a/test/CodeGen/ARM/returned-ext.ll
+++ b/test/CodeGen/ARM/returned-ext.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 declare i16 @identity16(i16 returned %x)
 declare i32 @identity32(i32 returned %x)
diff --git a/test/CodeGen/ARM/returned-trunc-tail-calls.ll b/test/CodeGen/ARM/returned-trunc-tail-calls.ll
index 5946727..6051a83 100644
--- a/test/CodeGen/ARM/returned-trunc-tail-calls.ll
+++ b/test/CodeGen/ARM/returned-trunc-tail-calls.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7 -arm-tail-calls | FileCheck %s
+; RUN: llc < %s -mtriple=armv7 | FileCheck %s
 
 declare i16 @ret16(i16 returned)
 declare i32 @ret32(i32 returned)
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 6c380ae..f95f971 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s
 
 define i32 @test1(i32 %X) nounwind {
 ; CHECK: test1
diff --git a/test/CodeGen/ARM/saxpy10-a9.ll b/test/CodeGen/ARM/saxpy10-a9.ll
new file mode 100644
index 0000000..f8f5e18
--- /dev/null
+++ b/test/CodeGen/ARM/saxpy10-a9.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -misched-bench -scheditins=false | FileCheck %s
+;
+; Test MI-Sched suppory latency based stalls on in in-order pipeline
+; using the new machine model.
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+; Don't be too strict with the top of the schedule, but most of it
+; should be nicely pipelined.
+;
+; CHECK: saxpy10:
+; CHECK: vldr
+; CHECK: vldr
+; CHECK: vldr
+; CHECK: vldr
+; CHECK: vldr
+; CHECK: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vmul
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vmul
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vmul
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vmul
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vmul
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vmul
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vmul
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vmul
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vmov
+; CHECK-NEXT: bx
+;
+; This accumulates a sum rather than storing each result.
+define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) {
+entry:
+  %0 = load float* %data1, align 4
+  %mul = fmul float %0, %a
+  %1 = load float* %data2, align 4
+  %add = fadd float %mul, %1
+  %add2 = fadd float %add, 0.000000e+00
+  %arrayidx.1 = getelementptr inbounds float* %data1, i32 1
+  %2 = load float* %arrayidx.1, align 4
+  %mul.1 = fmul float %2, %a
+  %arrayidx1.1 = getelementptr inbounds float* %data2, i32 1
+  %3 = load float* %arrayidx1.1, align 4
+  %add.1 = fadd float %mul.1, %3
+  %add2.1 = fadd float %add2, %add.1
+  %arrayidx.2 = getelementptr inbounds float* %data1, i32 2
+  %4 = load float* %arrayidx.2, align 4
+  %mul.2 = fmul float %4, %a
+  %arrayidx1.2 = getelementptr inbounds float* %data2, i32 2
+  %5 = load float* %arrayidx1.2, align 4
+  %add.2 = fadd float %mul.2, %5
+  %add2.2 = fadd float %add2.1, %add.2
+  %arrayidx.3 = getelementptr inbounds float* %data1, i32 3
+  %6 = load float* %arrayidx.3, align 4
+  %mul.3 = fmul float %6, %a
+  %arrayidx1.3 = getelementptr inbounds float* %data2, i32 3
+  %7 = load float* %arrayidx1.3, align 4
+  %add.3 = fadd float %mul.3, %7
+  %add2.3 = fadd float %add2.2, %add.3
+  %arrayidx.4 = getelementptr inbounds float* %data1, i32 4
+  %8 = load float* %arrayidx.4, align 4
+  %mul.4 = fmul float %8, %a
+  %arrayidx1.4 = getelementptr inbounds float* %data2, i32 4
+  %9 = load float* %arrayidx1.4, align 4
+  %add.4 = fadd float %mul.4, %9
+  %add2.4 = fadd float %add2.3, %add.4
+  %arrayidx.5 = getelementptr inbounds float* %data1, i32 5
+  %10 = load float* %arrayidx.5, align 4
+  %mul.5 = fmul float %10, %a
+  %arrayidx1.5 = getelementptr inbounds float* %data2, i32 5
+  %11 = load float* %arrayidx1.5, align 4
+  %add.5 = fadd float %mul.5, %11
+  %add2.5 = fadd float %add2.4, %add.5
+  %arrayidx.6 = getelementptr inbounds float* %data1, i32 6
+  %12 = load float* %arrayidx.6, align 4
+  %mul.6 = fmul float %12, %a
+  %arrayidx1.6 = getelementptr inbounds float* %data2, i32 6
+  %13 = load float* %arrayidx1.6, align 4
+  %add.6 = fadd float %mul.6, %13
+  %add2.6 = fadd float %add2.5, %add.6
+  %arrayidx.7 = getelementptr inbounds float* %data1, i32 7
+  %14 = load float* %arrayidx.7, align 4
+  %mul.7 = fmul float %14, %a
+  %arrayidx1.7 = getelementptr inbounds float* %data2, i32 7
+  %15 = load float* %arrayidx1.7, align 4
+  %add.7 = fadd float %mul.7, %15
+  %add2.7 = fadd float %add2.6, %add.7
+  %arrayidx.8 = getelementptr inbounds float* %data1, i32 8
+  %16 = load float* %arrayidx.8, align 4
+  %mul.8 = fmul float %16, %a
+  %arrayidx1.8 = getelementptr inbounds float* %data2, i32 8
+  %17 = load float* %arrayidx1.8, align 4
+  %add.8 = fadd float %mul.8, %17
+  %add2.8 = fadd float %add2.7, %add.8
+  %arrayidx.9 = getelementptr inbounds float* %data1, i32 9
+  %18 = load float* %arrayidx.9, align 4
+  %mul.9 = fmul float %18, %a
+  %arrayidx1.9 = getelementptr inbounds float* %data2, i32 9
+  %19 = load float* %arrayidx1.9, align 4
+  %add.9 = fadd float %mul.9, %19
+  %add2.9 = fadd float %add2.8, %add.9
+  ret float %add2.9
+}
diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll
index 36fbd19..3c25edc 100644
--- a/test/CodeGen/ARM/sbfx.ll
+++ b/test/CodeGen/ARM/sbfx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v6t2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
 entry:
diff --git a/test/CodeGen/ARM/segmented-stacks-dynamic.ll b/test/CodeGen/ARM/segmented-stacks-dynamic.ll
new file mode 100644
index 0000000..13b5bcf
--- /dev/null
+++ b/test/CodeGen/ARM/segmented-stacks-dynamic.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
+; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
+
+define i32 @test_basic(i32 %l) {
+        %mem = alloca i32, i32 %l
+        call void @dummy_use (i32* %mem, i32 %l)
+        %terminate = icmp eq i32 %l, 0
+        br i1 %terminate, label %true, label %false
+
+true:
+        ret i32 0
+
+false:
+        %newlen = sub i32 %l, 1
+        %retvalue = call i32 @test_basic(i32 %newlen)
+        ret i32 %retvalue
+
+; ARM-linux:      test_basic:
+
+; ARM-linux:      push    {r4, r5}
+; ARM-linux-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-linux-NEXT: mov     r5, sp
+; ARM-linux-NEXT: ldr     r4, [r4, #4]
+; ARM-linux-NEXT: cmp     r4, r5
+; ARM-linux-NEXT: blo     .LBB0_2
+
+; ARM-linux:      mov     r4, #24
+; ARM-linux-NEXT: mov     r5, #0
+; ARM-linux-NEXT: stmdb   sp!, {lr}
+; ARM-linux-NEXT: bl      __morestack
+; ARM-linux-NEXT: ldm     sp!, {lr}
+; ARM-linux-NEXT: pop     {r4, r5}
+; ARM-linux-NEXT: bx      lr
+
+; ARM-linux:      pop     {r4, r5}
+
+
+; ARM-android:      test_basic:
+
+; ARM-android:      push    {r4, r5}
+; ARM-android-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-android-NEXT: mov     r5, sp
+; ARM-android-NEXT: ldr     r4, [r4, #252]
+; ARM-android-NEXT: cmp     r4, r5
+; ARM-android-NEXT: blo     .LBB0_2
+
+; ARM-android:      mov     r4, #24
+; ARM-android-NEXT: mov     r5, #0
+; ARM-android-NEXT: stmdb   sp!, {lr}
+; ARM-android-NEXT: bl      __morestack
+; ARM-android-NEXT: ldm     sp!, {lr}
+; ARM-android-NEXT: pop     {r4, r5}
+; ARM-android-NEXT: bx      lr
+
+; ARM-android:      pop     {r4, r5}
+
+}
diff --git a/test/CodeGen/ARM/segmented-stacks.ll b/test/CodeGen/ARM/segmented-stacks.ll
new file mode 100644
index 0000000..5eff633
--- /dev/null
+++ b/test/CodeGen/ARM/segmented-stacks.ll
@@ -0,0 +1,235 @@
+; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
+
+; We used to crash with filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
+
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
+
+define void @test_basic() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+	ret void
+
+; ARM-linux:      test_basic:
+
+; ARM-linux:      push    {r4, r5}
+; ARM-linux-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-linux-NEXT: mov     r5, sp
+; ARM-linux-NEXT: ldr     r4, [r4, #4]
+; ARM-linux-NEXT: cmp     r4, r5
+; ARM-linux-NEXT: blo     .LBB0_2
+
+; ARM-linux:      mov     r4, #48
+; ARM-linux-NEXT: mov     r5, #0
+; ARM-linux-NEXT: stmdb   sp!, {lr}
+; ARM-linux-NEXT: bl      __morestack
+; ARM-linux-NEXT: ldm     sp!, {lr}
+; ARM-linux-NEXT: pop     {r4, r5}
+; ARM-linux-NEXT: bx      lr
+
+; ARM-linux:      pop     {r4, r5}
+
+; ARM-android:      test_basic:
+
+; ARM-android:      push    {r4, r5}
+; ARM-android-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-android-NEXT: mov     r5, sp
+; ARM-android-NEXT: ldr     r4, [r4, #252]
+; ARM-android-NEXT: cmp     r4, r5
+; ARM-android-NEXT: blo     .LBB0_2
+
+; ARM-android:      mov     r4, #48
+; ARM-android-NEXT: mov     r5, #0
+; ARM-android-NEXT: stmdb   sp!, {lr}
+; ARM-android-NEXT: bl      __morestack
+; ARM-android-NEXT: ldm     sp!, {lr}
+; ARM-android-NEXT: pop     {r4, r5}
+; ARM-android-NEXT: bx      lr
+
+; ARM-android:      pop     {r4, r5}
+
+}
+
+define i32 @test_nested(i32 * nest %closure, i32 %other) {
+       %addend = load i32 * %closure
+       %result = add i32 %other, %addend
+       ret i32 %result
+
+; ARM-linux:      test_nested:
+
+; ARM-linux:      push    {r4, r5}
+; ARM-linux-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-linux-NEXT: mov     r5, sp
+; ARM-linux-NEXT: ldr     r4, [r4, #4]
+; ARM-linux-NEXT: cmp     r4, r5
+; ARM-linux-NEXT: blo     .LBB1_2
+
+; ARM-linux:      mov     r4, #0
+; ARM-linux-NEXT: mov     r5, #0
+; ARM-linux-NEXT: stmdb   sp!, {lr}
+; ARM-linux-NEXT: bl      __morestack
+; ARM-linux-NEXT: ldm     sp!, {lr}
+; ARM-linux-NEXT: pop     {r4, r5}
+; ARM-linux-NEXT: bx      lr
+
+; ARM-linux:      pop     {r4, r5}
+
+; ARM-android:      test_nested:
+
+; ARM-android:      push    {r4, r5}
+; ARM-android-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-android-NEXT: mov     r5, sp
+; ARM-android-NEXT: ldr     r4, [r4, #252]
+; ARM-android-NEXT: cmp     r4, r5
+; ARM-android-NEXT: blo     .LBB1_2
+
+; ARM-android:      mov     r4, #0
+; ARM-android-NEXT: mov     r5, #0
+; ARM-android-NEXT: stmdb   sp!, {lr}
+; ARM-android-NEXT: bl      __morestack
+; ARM-android-NEXT: ldm     sp!, {lr}
+; ARM-android-NEXT: pop     {r4, r5}
+; ARM-android-NEXT: bx      lr
+
+; ARM-android:      pop     {r4, r5}
+
+}
+
+define void @test_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; ARM-linux:      test_large:
+
+; ARM-linux:      push    {r4, r5}
+; ARM-linux-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-linux-NEXT: sub     r5, sp, #40192
+; ARM-linux-NEXT: ldr     r4, [r4, #4]
+; ARM-linux-NEXT: cmp     r4, r5
+; ARM-linux-NEXT: blo     .LBB2_2
+
+; ARM-linux:      mov     r4, #40192
+; ARM-linux-NEXT: mov     r5, #0
+; ARM-linux-NEXT: stmdb   sp!, {lr}
+; ARM-linux-NEXT: bl      __morestack
+; ARM-linux-NEXT: ldm     sp!, {lr}
+; ARM-linux-NEXT: pop     {r4, r5}
+; ARM-linux-NEXT: bx      lr
+
+; ARM-linux:      pop     {r4, r5}
+
+; ARM-android:      test_large:
+
+; ARM-android:      push    {r4, r5}
+; ARM-android-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-android-NEXT: sub     r5, sp, #40192
+; ARM-android-NEXT: ldr     r4, [r4, #252]
+; ARM-android-NEXT: cmp     r4, r5
+; ARM-android-NEXT: blo     .LBB2_2
+
+; ARM-android:      mov     r4, #40192
+; ARM-android-NEXT: mov     r5, #0
+; ARM-android-NEXT: stmdb   sp!, {lr}
+; ARM-android-NEXT: bl      __morestack
+; ARM-android-NEXT: ldm     sp!, {lr}
+; ARM-android-NEXT: pop     {r4, r5}
+; ARM-android-NEXT: bx      lr
+
+; ARM-android:      pop     {r4, r5}
+
+}
+
+define fastcc void @test_fastcc() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+        ret void
+
+; ARM-linux:      test_fastcc:
+
+; ARM-linux:      push    {r4, r5}
+; ARM-linux-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-linux-NEXT: mov     r5, sp
+; ARM-linux-NEXT: ldr     r4, [r4, #4]
+; ARM-linux-NEXT: cmp     r4, r5
+; ARM-linux-NEXT: blo     .LBB3_2
+
+; ARM-linux:      mov     r4, #48
+; ARM-linux-NEXT: mov     r5, #0
+; ARM-linux-NEXT: stmdb   sp!, {lr}
+; ARM-linux-NEXT: bl      __morestack
+; ARM-linux-NEXT: ldm     sp!, {lr}
+; ARM-linux-NEXT: pop     {r4, r5}
+; ARM-linux-NEXT: bx      lr
+
+; ARM-linux:      pop     {r4, r5}
+
+; ARM-android:      test_fastcc:
+
+; ARM-android:      push    {r4, r5}
+; ARM-android-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-android-NEXT: mov     r5, sp
+; ARM-android-NEXT: ldr     r4, [r4, #252]
+; ARM-android-NEXT: cmp     r4, r5
+; ARM-android-NEXT: blo     .LBB3_2
+
+; ARM-android:      mov     r4, #48
+; ARM-android-NEXT: mov     r5, #0
+; ARM-android-NEXT: stmdb   sp!, {lr}
+; ARM-android-NEXT: bl      __morestack
+; ARM-android-NEXT: ldm     sp!, {lr}
+; ARM-android-NEXT: pop     {r4, r5}
+; ARM-android-NEXT: bx      lr
+
+; ARM-android:      pop     {r4, r5}
+
+}
+
+define fastcc void @test_fastcc_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; ARM-linux:      test_fastcc_large:
+
+; ARM-linux:      push    {r4, r5}
+; ARM-linux-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-linux-NEXT: sub     r5, sp, #40192
+; ARM-linux-NEXT: ldr     r4, [r4, #4]
+; ARM-linux-NEXT: cmp     r4, r5
+; ARM-linux-NEXT: blo     .LBB4_2
+
+; ARM-linux:      mov     r4, #40192
+; ARM-linux-NEXT: mov     r5, #0
+; ARM-linux-NEXT: stmdb   sp!, {lr}
+; ARM-linux-NEXT: bl      __morestack
+; ARM-linux-NEXT: ldm     sp!, {lr}
+; ARM-linux-NEXT: pop     {r4, r5}
+; ARM-linux-NEXT: bx      lr
+
+; ARM-linux:      pop     {r4, r5}
+
+; ARM-android:      test_fastcc_large:
+
+; ARM-android:      push    {r4, r5}
+; ARM-android-NEXT: mrc     p15, #0, r4, c13, c0, #3
+; ARM-android-NEXT: sub     r5, sp, #40192
+; ARM-android-NEXT: ldr     r4, [r4, #252]
+; ARM-android-NEXT: cmp     r4, r5
+; ARM-android-NEXT: blo     .LBB4_2
+
+; ARM-android:      mov     r4, #40192
+; ARM-android-NEXT: mov     r5, #0
+; ARM-android-NEXT: stmdb   sp!, {lr}
+; ARM-android-NEXT: bl      __morestack
+; ARM-android-NEXT: ldm     sp!, {lr}
+; ARM-android-NEXT: pop     {r4, r5}
+; ARM-android-NEXT: bx      lr
+
+; ARM-android:      pop     {r4, r5}
+
+}
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index 6f4bfb8..e2dc554 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -1,6 +1,10 @@
-; RUN: llc < %s -march=arm                  | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -march=arm -mattr=+thumb2   | FileCheck %s --check-prefix=ARMT2
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s --check-prefix=THUMB2
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s --check-prefix=ARM
+
+; RUN: llc -mtriple=arm-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
+; RUN:  | FileCheck %s --check-prefix=ARMT2
+
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
+; RUN:  | FileCheck %s --check-prefix=THUMB2
 
 define i32 @t1(i32 %c) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/select-undef.ll b/test/CodeGen/ARM/select-undef.ll
index 23f7eb8..bae4d40 100644
--- a/test/CodeGen/ARM/select-undef.ll
+++ b/test/CodeGen/ARM/select-undef.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mcpu=swift -verify-machineinstrs
+; RUN: llc -mtriple=arm-eabi -mcpu=swift -verify-machineinstrs %s -o /dev/null
+
 define i32 @func(i32 %arg0, i32 %arg1) {
 entry:
   %cmp = icmp slt i32 %arg0, 10
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index ed006d6..e9394a7 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,6 +1,10 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
-; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: llc -mtriple=arm-apple-darwin %s -o - | FileCheck %s
+
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN:	| FileCheck %s --check-prefix=CHECK-VFP
+
+; RUN: llc -mtriple=thumbv7-apple-darwin -mattr=+neon,+thumb2 %s -o - \
+; RUN:	| FileCheck %s --check-prefix=CHECK-NEON
 
 define i32 @f1(i32 %a.s) {
 ;CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM/setcc-sentinals.ll b/test/CodeGen/ARM/setcc-sentinals.ll
index 8878f9b..dc45e0e 100644
--- a/test/CodeGen/ARM/setcc-sentinals.ll
+++ b/test/CodeGen/ARM/setcc-sentinals.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -mcpu=cortex-a8 -march=arm -asm-verbose=false | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -asm-verbose=false %s -o - | FileCheck %s
 
 define zeroext i1 @test0(i32 %x) nounwind {
 ; CHECK-LABEL: test0:
-; CHECK-NEXT: add [[REG:(r[0-9]+)|(lr)]], r0, #1
+; CHECK: add [[REG:(r[0-9]+)|(lr)]], r0, #1
 ; CHECK-NEXT: mov r0, #0
 ; CHECK-NEXT: cmp [[REG]], #1
 ; CHECK-NEXT: movwhi r0, #1
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll
index 686d791..b7ddd10 100644
--- a/test/CodeGen/ARM/smul.ll
+++ b/test/CodeGen/ARM/smul.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mcpu=generic
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=generic %s -o /dev/null
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
diff --git a/test/CodeGen/ARM/ssp-data-layout.ll b/test/CodeGen/ARM/ssp-data-layout.ll
new file mode 100644
index 0000000..e7dafac
--- /dev/null
+++ b/test/CodeGen/ARM/ssp-data-layout.ll
@@ -0,0 +1,528 @@
+; RUN: llc < %s -disable-fp-elim -march=arm -mcpu=cortex-a8 -mtriple arm-linux-gnu -o - | FileCheck %s
+;  This test is fairly fragile.  The goal is to ensure that "large" stack
+;  objects are allocated closest to the stack protector (i.e., farthest away 
+;  from the Stack Pointer.)  In standard SSP mode this means that large (>=
+;  ssp-buffer-size) arrays and structures containing such arrays are
+;  closet to the protector.  With sspstrong and sspreq this means large
+;  arrays/structures-with-arrays are closest, followed by small (< ssp-buffer-size)
+;  arrays/structures-with-arrays, and then addr-taken variables.
+;
+;  Ideally, we only want verify that the objects appear in the correct groups
+;  and that the groups have the correct relative stack offset.  The ordering
+;  within a group is not relevant to this test.  Unfortunately, there is not
+;  an elegant way to do this, so just match the offset for each object.
+
+%struct.struct_large_char = type { [8 x i8] }
+%struct.struct_large_char2 = type { [2 x i8], [8 x i8] }
+%struct.struct_small_char = type { [2 x i8] }
+%struct.struct_large_nonchar = type { [8 x i32] }
+%struct.struct_small_nonchar = type { [2 x i16] }
+
+define void @layout_ssp() ssp {
+entry:
+; Expected stack layout for ssp is
+;  180 large_char          . Group 1, nested arrays, arrays >= ssp-buffer-size
+;  172 struct_large_char   .
+;  168 scalar1             | Everything else
+;  164 scalar2
+;  160 scalar3
+;  156 addr-of
+;  152 small_nonchar (84+68)
+;  112 large_nonchar
+;  110 small_char
+;  108 struct_small_char
+;   72 struct_large_nonchar
+;   68 struct_small_nonchar
+
+; CHECK: layout_ssp:
+; r[[SP]] is used as an offset into the stack later
+; CHECK: add r[[SP:[0-9]+]], sp, #68
+
+; CHECK: bl get_scalar1
+; CHECK: str r0, [sp, #168]
+; CHECK: bl end_scalar1
+
+; CHECK: bl get_scalar2
+; CHECK: str r0, [sp, #164]
+; CHECK: bl end_scalar2
+
+; CHECK: bl get_scalar3
+; CHECK: str r0, [sp, #160]
+; CHECK: bl end_scalar3
+
+; CHECK: bl get_addrof
+; CHECK: str r0, [sp, #156]
+; CHECK: bl end_addrof
+
+; CHECK: get_small_nonchar
+; CHECK: strh r0, [r[[SP]], #84]
+; CHECK: bl end_small_nonchar
+
+; CHECK: bl get_large_nonchar
+; CHECK: str r0, [sp, #112]
+; CHECK: bl end_large_nonchar
+
+; CHECK: bl get_small_char
+; CHECK: strb r0, [sp, #110]
+; CHECK: bl end_small_char
+
+; CHECK: bl get_large_char
+; CHECK: strb r0, [sp, #180]
+; CHECK: bl end_large_char
+
+; CHECK: bl get_struct_large_char
+; CHECK: strb r0, [sp, #172]
+; CHECK: bl end_struct_large_char
+
+; CHECK: bl get_struct_small_char
+; CHECK: strb r0, [sp, #108]
+; CHECK: bl end_struct_small_char
+
+; CHECK: bl get_struct_large_nonchar
+; CHECK:str r0, [sp, #72]
+; CHECK: bl end_struct_large_nonchar
+
+; CHECK: bl get_struct_small_nonchar
+; CHECK: strh r0, [r[[SP]]]
+; CHECK: bl end_struct_small_nonchar
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  %ptr = alloca i32, align 4
+  %small2 = alloca [2 x i16], align 2
+  %large2 = alloca [8 x i32], align 16
+  %small = alloca [2 x i8], align 1
+  %large = alloca [8 x i8], align 1
+  %a = alloca %struct.struct_large_char, align 1
+  %b = alloca %struct.struct_small_char, align 1
+  %c = alloca %struct.struct_large_nonchar, align 8
+  %d = alloca %struct.struct_small_nonchar, align 2
+  %call = call i32 @get_scalar1()
+  store i32 %call, i32* %x, align 4
+  call void @end_scalar1()
+  %call1 = call i32 @get_scalar2()
+  store i32 %call1, i32* %y, align 4
+  call void @end_scalar2()
+  %call2 = call i32 @get_scalar3()
+  store i32 %call2, i32* %z, align 4
+  call void @end_scalar3()
+  %call3 = call i32 @get_addrof()
+  store i32 %call3, i32* %ptr, align 4
+  call void @end_addrof()
+  %call4 = call signext i16 @get_small_nonchar()
+  %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+  store i16 %call4, i16* %arrayidx, align 2
+  call void @end_small_nonchar()
+  %call5 = call i32 @get_large_nonchar()
+  %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+  store i32 %call5, i32* %arrayidx6, align 4
+  call void @end_large_nonchar()
+  %call7 = call signext i8 @get_small_char()
+  %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+  store i8 %call7, i8* %arrayidx8, align 1
+  call void @end_small_char()
+  %call9 = call signext i8 @get_large_char()
+  %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+  store i8 %call9, i8* %arrayidx10, align 1
+  call void @end_large_char()
+  %call11 = call signext i8 @get_struct_large_char()
+  %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
+  %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+  store i8 %call11, i8* %arrayidx12, align 1
+  call void @end_struct_large_char()
+  %call13 = call signext i8 @get_struct_small_char()
+  %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
+  %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+  store i8 %call13, i8* %arrayidx15, align 1
+  call void @end_struct_small_char()
+  %call16 = call i32 @get_struct_large_nonchar()
+  %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
+  %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+  store i32 %call16, i32* %arrayidx18, align 4
+  call void @end_struct_large_nonchar()
+  %call19 = call signext i16 @get_struct_small_nonchar()
+  %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
+  %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+  store i16 %call19, i16* %arrayidx21, align 2
+  call void @end_struct_small_nonchar()
+  %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
+  %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
+  %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
+  %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
+  %0 = load i32* %x, align 4
+  %1 = load i32* %y, align 4
+  %2 = load i32* %z, align 4
+  %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+  %3 = bitcast [8 x i8]* %coerce.dive to i64*
+  %4 = load i64* %3, align 1
+  %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+  %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
+  %6 = load i16* %5, align 1
+  %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+  %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
+  %8 = load i32* %7, align 1
+  call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
+  ret void
+}
+
+define void @layout_sspstrong() sspstrong {
+entry:
+; Expected stack layout for sspstrong is
+; 144  large_nonchar          . Group 1, nested arrays,
+; 136  large_char             .  arrays >= ssp-buffer-size
+; 128  struct_large_char      .
+; 96   struct_large_nonchar   .
+; 84+8 small_non_char         | Group 2, nested arrays, 
+; 90   small_char             |  arrays < ssp-buffer-size
+; 88   struct_small_char      |
+; 84   struct_small_nonchar   |
+; 80   addrof                 * Group 3, addr-of local
+; 76   scalar1                + Group 4, everything else
+; 72   scalar2                +
+; 68   scalar3                +
+;   
+; CHECK: layout_sspstrong:
+; r[[SP]] is used as an offset into the stack later
+; CHECK: add r[[SP:[0-9]+]], sp, #84
+
+; CHECK: bl get_scalar1
+; CHECK: str r0, [sp, #76]
+; CHECK: bl end_scalar1
+
+; CHECK: bl get_scalar2
+; CHECK: str r0, [sp, #72]
+; CHECK: bl end_scalar2
+
+; CHECK: bl get_scalar3
+; CHECK: str r0, [sp, #68]
+; CHECK: bl end_scalar3
+
+; CHECK: bl get_addrof
+; CHECK: str r0, [sp, #80]
+; CHECK: bl end_addrof
+
+; CHECK: get_small_nonchar
+; CHECK: strh r0, [r[[SP]], #8]
+; CHECK: bl end_small_nonchar
+
+; CHECK: bl get_large_nonchar
+; CHECK: str r0, [sp, #144]
+; CHECK: bl end_large_nonchar
+
+; CHECK: bl get_small_char
+; CHECK: strb r0, [sp, #90]
+; CHECK: bl end_small_char
+
+; CHECK: bl get_large_char
+; CHECK: strb r0, [sp, #136]
+; CHECK: bl end_large_char
+
+; CHECK: bl get_struct_large_char
+; CHECK: strb r0, [sp, #128]
+; CHECK: bl end_struct_large_char
+
+; CHECK: bl get_struct_small_char
+; CHECK: strb r0, [sp, #88]
+; CHECK: bl end_struct_small_char
+
+; CHECK: bl get_struct_large_nonchar
+; CHECK: str r0, [sp, #96]
+; CHECK: bl end_struct_large_nonchar
+
+; CHECK: bl get_struct_small_nonchar
+; CHECK: strh r0, [r[[SP]]]
+; CHECK: bl end_struct_small_nonchar
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  %ptr = alloca i32, align 4
+  %small2 = alloca [2 x i16], align 2
+  %large2 = alloca [8 x i32], align 16
+  %small = alloca [2 x i8], align 1
+  %large = alloca [8 x i8], align 1
+  %a = alloca %struct.struct_large_char, align 1
+  %b = alloca %struct.struct_small_char, align 1
+  %c = alloca %struct.struct_large_nonchar, align 8
+  %d = alloca %struct.struct_small_nonchar, align 2
+  %call = call i32 @get_scalar1()
+  store i32 %call, i32* %x, align 4
+  call void @end_scalar1()
+  %call1 = call i32 @get_scalar2()
+  store i32 %call1, i32* %y, align 4
+  call void @end_scalar2()
+  %call2 = call i32 @get_scalar3()
+  store i32 %call2, i32* %z, align 4
+  call void @end_scalar3()
+  %call3 = call i32 @get_addrof()
+  store i32 %call3, i32* %ptr, align 4
+  call void @end_addrof()
+  %call4 = call signext i16 @get_small_nonchar()
+  %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+  store i16 %call4, i16* %arrayidx, align 2
+  call void @end_small_nonchar()
+  %call5 = call i32 @get_large_nonchar()
+  %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+  store i32 %call5, i32* %arrayidx6, align 4
+  call void @end_large_nonchar()
+  %call7 = call signext i8 @get_small_char()
+  %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+  store i8 %call7, i8* %arrayidx8, align 1
+  call void @end_small_char()
+  %call9 = call signext i8 @get_large_char()
+  %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+  store i8 %call9, i8* %arrayidx10, align 1
+  call void @end_large_char()
+  %call11 = call signext i8 @get_struct_large_char()
+  %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
+  %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+  store i8 %call11, i8* %arrayidx12, align 1
+  call void @end_struct_large_char()
+  %call13 = call signext i8 @get_struct_small_char()
+  %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
+  %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+  store i8 %call13, i8* %arrayidx15, align 1
+  call void @end_struct_small_char()
+  %call16 = call i32 @get_struct_large_nonchar()
+  %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
+  %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+  store i32 %call16, i32* %arrayidx18, align 4
+  call void @end_struct_large_nonchar()
+  %call19 = call signext i16 @get_struct_small_nonchar()
+  %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
+  %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+  store i16 %call19, i16* %arrayidx21, align 2
+  call void @end_struct_small_nonchar()
+  %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
+  %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
+  %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
+  %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
+  %0 = load i32* %x, align 4
+  %1 = load i32* %y, align 4
+  %2 = load i32* %z, align 4
+  %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+  %3 = bitcast [8 x i8]* %coerce.dive to i64*
+  %4 = load i64* %3, align 1
+  %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+  %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
+  %6 = load i16* %5, align 1
+  %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+  %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
+  %8 = load i32* %7, align 1
+  call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
+  ret void
+}
+
+define void @layout_sspreq() sspreq {
+entry:
+; Expected stack layout for sspreq is the same as sspstrong
+;   
+; CHECK: layout_sspreq:
+; r[[SP]] is used as an offset into the stack later
+; CHECK: add r[[SP:[0-9]+]], sp, #84
+
+; CHECK: bl get_scalar1
+; CHECK: str r0, [sp, #76]
+; CHECK: bl end_scalar1
+
+; CHECK: bl get_scalar2
+; CHECK: str r0, [sp, #72]
+; CHECK: bl end_scalar2
+
+; CHECK: bl get_scalar3
+; CHECK: str r0, [sp, #68]
+; CHECK: bl end_scalar3
+
+; CHECK: bl get_addrof
+; CHECK: str r0, [sp, #80]
+; CHECK: bl end_addrof
+
+; CHECK: get_small_nonchar
+; CHECK: strh r0, [r[[SP]], #8]
+; CHECK: bl end_small_nonchar
+
+; CHECK: bl get_large_nonchar
+; CHECK: str r0, [sp, #144]
+; CHECK: bl end_large_nonchar
+
+; CHECK: bl get_small_char
+; CHECK: strb r0, [sp, #90]
+; CHECK: bl end_small_char
+
+; CHECK: bl get_large_char
+; CHECK: strb r0, [sp, #136]
+; CHECK: bl end_large_char
+
+; CHECK: bl get_struct_large_char
+; CHECK: strb r0, [sp, #128]
+; CHECK: bl end_struct_large_char
+
+; CHECK: bl get_struct_small_char
+; CHECK: strb r0, [sp, #88]
+; CHECK: bl end_struct_small_char
+
+; CHECK: bl get_struct_large_nonchar
+; CHECK: str r0, [sp, #96]
+; CHECK: bl end_struct_large_nonchar
+
+; CHECK: bl get_struct_small_nonchar
+; CHECK: strh r0, [r[[SP]]]
+; CHECK: bl end_struct_small_nonchar
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  %ptr = alloca i32, align 4
+  %small2 = alloca [2 x i16], align 2
+  %large2 = alloca [8 x i32], align 16
+  %small = alloca [2 x i8], align 1
+  %large = alloca [8 x i8], align 1
+  %a = alloca %struct.struct_large_char, align 1
+  %b = alloca %struct.struct_small_char, align 1
+  %c = alloca %struct.struct_large_nonchar, align 8
+  %d = alloca %struct.struct_small_nonchar, align 2
+  %call = call i32 @get_scalar1()
+  store i32 %call, i32* %x, align 4
+  call void @end_scalar1()
+  %call1 = call i32 @get_scalar2()
+  store i32 %call1, i32* %y, align 4
+  call void @end_scalar2()
+  %call2 = call i32 @get_scalar3()
+  store i32 %call2, i32* %z, align 4
+  call void @end_scalar3()
+  %call3 = call i32 @get_addrof()
+  store i32 %call3, i32* %ptr, align 4
+  call void @end_addrof()
+  %call4 = call signext i16 @get_small_nonchar()
+  %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+  store i16 %call4, i16* %arrayidx, align 2
+  call void @end_small_nonchar()
+  %call5 = call i32 @get_large_nonchar()
+  %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+  store i32 %call5, i32* %arrayidx6, align 4
+  call void @end_large_nonchar()
+  %call7 = call signext i8 @get_small_char()
+  %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+  store i8 %call7, i8* %arrayidx8, align 1
+  call void @end_small_char()
+  %call9 = call signext i8 @get_large_char()
+  %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+  store i8 %call9, i8* %arrayidx10, align 1
+  call void @end_large_char()
+  %call11 = call signext i8 @get_struct_large_char()
+  %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
+  %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+  store i8 %call11, i8* %arrayidx12, align 1
+  call void @end_struct_large_char()
+  %call13 = call signext i8 @get_struct_small_char()
+  %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
+  %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+  store i8 %call13, i8* %arrayidx15, align 1
+  call void @end_struct_small_char()
+  %call16 = call i32 @get_struct_large_nonchar()
+  %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
+  %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+  store i32 %call16, i32* %arrayidx18, align 4
+  call void @end_struct_large_nonchar()
+  %call19 = call signext i16 @get_struct_small_nonchar()
+  %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
+  %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+  store i16 %call19, i16* %arrayidx21, align 2
+  call void @end_struct_small_nonchar()
+  %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
+  %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
+  %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
+  %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
+  %0 = load i32* %x, align 4
+  %1 = load i32* %y, align 4
+  %2 = load i32* %z, align 4
+  %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+  %3 = bitcast [8 x i8]* %coerce.dive to i64*
+  %4 = load i64* %3, align 1
+  %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+  %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
+  %6 = load i16* %5, align 1
+  %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+  %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
+  %8 = load i32* %7, align 1
+  call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
+  ret void
+}
+
+define void @struct_with_protectable_arrays() sspstrong {
+entry:
+; Check to ensure that a structure which contains a small array followed by a
+; large array is assigned to the stack properly as a large object.
+; CHECK: struct_with_protectable_arrays:
+; CHECK: bl get_struct_small_char
+; CHECK: strb r0, [sp, #68]
+; CHECK: bl end_struct_small_char
+; CHECK: bl get_struct_large_char2
+; CHECK: strb r0, [sp, #106]
+; CHECK: bl end_struct_large_char2
+  %a = alloca %struct.struct_small_char, align 1
+  %b = alloca %struct.struct_large_char2, align 1
+  %d1 = alloca %struct.struct_large_nonchar, align 8
+  %d2 = alloca %struct.struct_small_nonchar, align 2
+  %call = call signext i8 @get_struct_small_char()
+  %foo = getelementptr inbounds %struct.struct_small_char* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %foo, i32 0, i64 0
+  store i8 %call, i8* %arrayidx, align 1
+  call void @end_struct_small_char()
+  %call1 = call signext i8 @get_struct_large_char2()
+  %foo2 = getelementptr inbounds %struct.struct_large_char2* %b, i32 0, i32 1
+  %arrayidx3 = getelementptr inbounds [8 x i8]* %foo2, i32 0, i64 0
+  store i8 %call1, i8* %arrayidx3, align 1
+  call void @end_struct_large_char2()
+  %0 = bitcast %struct.struct_large_char2* %b to %struct.struct_large_char*
+  %coerce.dive = getelementptr %struct.struct_large_char* %0, i32 0, i32 0
+  %1 = bitcast [8 x i8]* %coerce.dive to i64*
+  %2 = load i64* %1, align 1
+  %coerce.dive4 = getelementptr %struct.struct_small_char* %a, i32 0, i32 0
+  %3 = bitcast [2 x i8]* %coerce.dive4 to i16*
+  %4 = load i16* %3, align 1
+  %coerce.dive5 = getelementptr %struct.struct_small_nonchar* %d2, i32 0, i32 0
+  %5 = bitcast [2 x i16]* %coerce.dive5 to i32*
+  %6 = load i32* %5, align 1
+  call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 8 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
+  ret void
+}
+
+declare i32 @get_scalar1()
+declare void @end_scalar1()
+
+declare i32 @get_scalar2()
+declare void @end_scalar2()
+
+declare i32 @get_scalar3()
+declare void @end_scalar3()
+
+declare i32 @get_addrof()
+declare void @end_addrof()
+
+declare signext i16 @get_small_nonchar()
+declare void @end_small_nonchar()
+
+declare i32 @get_large_nonchar()
+declare void @end_large_nonchar()
+
+declare signext i8 @get_small_char()
+declare void @end_small_char()
+
+declare signext i8 @get_large_char()
+declare void @end_large_char()
+
+declare signext i8 @get_struct_large_char()
+declare void @end_struct_large_char()
+
+declare signext i8 @get_struct_large_char2()
+declare void @end_struct_large_char2()
+
+declare signext i8 @get_struct_small_char()
+declare void @end_struct_small_char()
+
+declare i32 @get_struct_large_nonchar()
+declare void @end_struct_large_nonchar()
+
+declare signext i16 @get_struct_small_nonchar()
+declare void @end_struct_small_nonchar()
+
+declare void @takes_all(i64, i16, %struct.struct_large_nonchar* byval align 8, i32, i8*, i8*, i32*, i16*, i32*, i32, i32, i32)
diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll
index 1dd57dd..a419074 100644
--- a/test/CodeGen/ARM/stack-frame.ll
+++ b/test/CodeGen/ARM/stack-frame.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -march=arm | grep add | count 1
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define void @f1() {
 	%c = alloca i8, align 1
@@ -10,4 +9,6 @@ define i32 @f2() {
 	ret i32 1
 }
 
+; CHECK: add
+; CHECK-NOT: add
 
diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll
index 32e3b85..a4f8640 100644
--- a/test/CodeGen/ARM/str_post.ll
+++ b/test/CodeGen/ARM/str_post.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i16 @test1(i32* %X, i16* %A) {
 ; CHECK-LABEL: test1:
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
index d8b3f0e..60e6e9ec 100644
--- a/test/CodeGen/ARM/str_pre.ll
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm | \
-; RUN:   grep "str.*\!" | count 2
+; RUN: llc -mtriple=arm-eabi %s -o -  | FileCheck %s
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
         %B = load i32* %A               ; <i32> [#uses=1]
@@ -16,3 +15,8 @@ define i16* @test2(i16* %X, i32* %A) {
         store i16 %tmp, i16* %Y
         ret i16* %Y
 }
+
+; CHECK: str{{.*}}!
+; CHECK: str{{.*}}!
+; CHECK-NOT: str{{.*}}!
+
diff --git a/test/CodeGen/ARM/str_trunc.ll b/test/CodeGen/ARM/str_trunc.ll
index 2f1166b..6739684 100644
--- a/test/CodeGen/ARM/str_trunc.ll
+++ b/test/CodeGen/ARM/str_trunc.ll
@@ -1,7 +1,4 @@
-; RUN: llc < %s -march=arm | \
-; RUN:   grep strb | count 1
-; RUN: llc < %s -march=arm | \
-; RUN:   grep strh | count 1
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define void @test1(i32 %v, i16* %ptr) {
         %tmp = trunc i32 %v to i16              ; <i16> [#uses=1]
@@ -14,3 +11,10 @@ define void @test2(i32 %v, i8* %ptr) {
         store i8 %tmp, i8* %ptr
         ret void
 }
+
+; CHECK: strh
+; CHECK-NOT: strh
+
+; CHECK: strb
+; CHECK-NOT: strb
+
diff --git a/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll b/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
index 1899269..0a9bc3c 100644
--- a/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
+++ b/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
@@ -13,7 +13,7 @@
 ;structs at varying alignments. Each test is run for arm, thumb2 and thumb1.
 ;We check for the strings in the generated object code using llvm-objdump
 ;because it provides better assurance that we are generating instructions
-;for the correct architecture. Otherwise we could accidently generate an
+;for the correct architecture. Otherwise we could accidentally generate an
 ;ARM instruction for THUMB1 and wouldn't detect it because the assembly
 ;code representation is the same, but the object code would be generated
 ;incorrectly. For each test we check for the label, a load instruction of the
diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll
index 7f82ca7..67bde2a 100644
--- a/test/CodeGen/ARM/sub.ll
+++ b/test/CodeGen/ARM/sub.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 1bc0315..d5abfc0 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -pre-RA-sched=source | FileCheck %s
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -pre-RA-sched=source -no-integrated-as | FileCheck %s
 target triple = "thumbv7-apple-ios"
 ; <rdar://problem/10032939>
 ;
diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll
index 656cd93..5ddea2e 100644
--- a/test/CodeGen/ARM/sxt_rot.ll
+++ b/test/CodeGen/ARM/sxt_rot.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s
 
 define i32 @test0(i8 %A) {
 ; CHECK: test0
diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll
index 8b41459..dd75cd1 100644
--- a/test/CodeGen/ARM/t2-imm.ll
+++ b/test/CodeGen/ARM/t2-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
 
 define i32 @f6(i32 %a) {
 ; CHECK:f6
diff --git a/test/CodeGen/ARM/tail-call.ll b/test/CodeGen/ARM/tail-call.ll
new file mode 100644
index 0000000..7711586
--- /dev/null
+++ b/test/CodeGen/ARM/tail-call.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple armv7 -O0 -o - < %s | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -mtriple armv7 -O0 -disable-tail-calls -o - < %s \
+; RUN:   | FileCheck %s -check-prefix CHECK-NO-TAIL
+
+declare i32 @callee(i32 %i)
+
+define i32 @caller(i32 %i) {
+entry:
+  %r = tail call i32 @callee(i32 %i)
+  ret i32 %r
+}
+
+; CHECK-TAIL-LABEL: caller
+; CHECK-TAIL: b callee
+
+; CHECK-NO-TAIL-LABEL: caller
+; CHECK-NO-TAIL: push {lr}
+; CHECK-NO-TAIL: bl callee
+; CHECK-NO-TAIL: pop {lr}
+; CHECK-NO-TAIL: bx lr
+
diff --git a/test/CodeGen/ARM/taildup-branch-weight.ll b/test/CodeGen/ARM/taildup-branch-weight.ll
new file mode 100644
index 0000000..0a16071
--- /dev/null
+++ b/test/CodeGen/ARM/taildup-branch-weight.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=arm-eabi -print-machineinstrs=tailduplication -tail-dup-size=100 \
+; RUN:      -enable-tail-merge=false -disable-cgp %s -o /dev/null 2>&1 \
+; RUN:	| FileCheck %s
+
+; CHECK: Machine code for function test0:
+; CHECK: Successors according to CFG: BB#1(4) BB#2(124)
+
+define void @test0(i32 %a, i32 %b, i32* %c, i32* %d) {
+entry:
+  store i32 3, i32* %d
+  br label %B1
+
+B2:
+  store i32 2, i32* %c
+  br label %B4
+
+B3:
+  store i32 2, i32* %c
+  br label %B4
+
+B1:
+  store i32 1, i32* %d
+  %test0 = icmp slt i32 %a, %b
+  br i1 %test0, label %B2, label %B3, !prof !0
+
+B4:
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 4, i32 124}
+
+; CHECK: Machine code for function test1:
+; CHECK: Successors according to CFG: BB#1(8) BB#2(248)
+
+@g0 = common global i32 0, align 4
+
+define void @test1(i32 %a, i32 %b, i32* %c, i32* %d, i32* %e) {
+
+  %test0 = icmp slt i32 %a, %b
+  br i1 %test0, label %B1, label %B2, !prof !1
+
+B1:
+  br label %B3
+
+B2:
+  store i32 2, i32* %c
+  br label %B3
+
+B3:
+  store i32 3, i32* %e
+  ret void
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 248, i32 8}
diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll
index cb42de6..c681a1c 100644
--- a/test/CodeGen/ARM/this-return.ll
+++ b/test/CodeGen/ARM/this-return.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 %struct.A = type { i8 }
 %struct.B = type { i32 }
diff --git a/test/CodeGen/ARM/thumb-litpool.ll b/test/CodeGen/ARM/thumb-litpool.ll
new file mode 100644
index 0000000..f68fdb6
--- /dev/null
+++ b/test/CodeGen/ARM/thumb-litpool.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=thumbv6m-apple-macho %s -relocation-model=static -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-apple-macho %s -relocation-model=pic -o - | FileCheck %s
+
+@var = global i8 zeroinitializer
+
+declare void @callee(i8*)
+
+define void @foo() minsize {
+; CHECK-LABEL: foo:
+; CHECK: ldr {{r[0-7]}}, LCPI0_0
+  call void @callee(i8* @var)
+  call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7}"()
+  call void @callee(i8* @var)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index 47c5dcc..d954760 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
 ; PR11107
 
diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll
index ccc9032..42c1ba9 100644
--- a/test/CodeGen/ARM/tls-models.ll
+++ b/test/CodeGen/ARM/tls-models.ll
@@ -22,9 +22,9 @@ entry:
 
   ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
   ; CHECK-NONPIC-LABEL:   f1:
-  ; CHECK-NONPIC:   external_gd(gottpoff)
+  ; CHECK-NONPIC:   external_gd(GOTTPOFF)
   ; CHECK-PIC-LABEL:      f1:
-  ; CHECK-PIC:      external_gd(tlsgd)
+  ; CHECK-PIC:      external_gd(TLSGD)
 }
 
 define i32* @f2() {
@@ -34,9 +34,9 @@ entry:
   ; Non-PIC code can use local exec, PIC code can use local dynamic,
   ; but that is not implemented, so falls back to general dynamic.
   ; CHECK-NONPIC-LABEL:   f2:
-  ; CHECK-NONPIC:   internal_gd(tpoff)
+  ; CHECK-NONPIC:   internal_gd(TPOFF)
   ; CHECK-PIC-LABEL:      f2:
-  ; CHECK-PIC:      internal_gd(tlsgd)
+  ; CHECK-PIC:      internal_gd(TLSGD)
 }
 
 
@@ -49,9 +49,9 @@ entry:
   ; Non-PIC code can use initial exec, PIC should use local dynamic,
   ; but that is not implemented, so falls back to general dynamic.
   ; CHECK-NONPIC-LABEL:   f3:
-  ; CHECK-NONPIC:   external_ld(gottpoff)
+  ; CHECK-NONPIC:   external_ld(GOTTPOFF)
   ; CHECK-PIC-LABEL:      f3:
-  ; CHECK-PIC:      external_ld(tlsgd)
+  ; CHECK-PIC:      external_ld(TLSGD)
 }
 
 define i32* @f4() {
@@ -61,9 +61,9 @@ entry:
   ; Non-PIC code can use local exec, PIC code can use local dynamic,
   ; but that is not implemented, so it falls back to general dynamic.
   ; CHECK-NONPIC-LABEL:   f4:
-  ; CHECK-NONPIC:   internal_ld(tpoff)
+  ; CHECK-NONPIC:   internal_ld(TPOFF)
   ; CHECK-PIC-LABEL:      f4:
-  ; CHECK-PIC:      internal_ld(tlsgd)
+  ; CHECK-PIC:      internal_ld(TLSGD)
 }
 
 
@@ -75,9 +75,9 @@ entry:
 
   ; Non-PIC and PIC code will use initial exec as specified.
   ; CHECK-NONPIC-LABEL:   f5:
-  ; CHECK-NONPIC:   external_ie(gottpoff)
+  ; CHECK-NONPIC:   external_ie(GOTTPOFF)
   ; CHECK-PIC-LABEL:      f5:
-  ; CHECK-PIC:      external_ie(gottpoff)
+  ; CHECK-PIC:      external_ie(GOTTPOFF)
 }
 
 define i32* @f6() {
@@ -86,9 +86,9 @@ entry:
 
   ; Non-PIC code can use local exec, PIC code use initial exec as specified.
   ; CHECK-NONPIC-LABEL:   f6:
-  ; CHECK-NONPIC:   internal_ie(tpoff)
+  ; CHECK-NONPIC:   internal_ie(TPOFF)
   ; CHECK-PIC-LABEL:      f6:
-  ; CHECK-PIC:      internal_ie(gottpoff)
+  ; CHECK-PIC:      internal_ie(GOTTPOFF)
 }
 
 
@@ -100,9 +100,9 @@ entry:
 
   ; Non-PIC and PIC code will use local exec as specified.
   ; CHECK-NONPIC-LABEL:   f7:
-  ; CHECK-NONPIC:   external_le(tpoff)
+  ; CHECK-NONPIC:   external_le(TPOFF)
   ; CHECK-PIC-LABEL:      f7:
-  ; CHECK-PIC:      external_le(tpoff)
+  ; CHECK-PIC:      external_le(TPOFF)
 }
 
 define i32* @f8() {
@@ -111,7 +111,7 @@ entry:
 
   ; Non-PIC and PIC code will use local exec as specified.
   ; CHECK-NONPIC-LABEL:   f8:
-  ; CHECK-NONPIC:   internal_le(tpoff)
+  ; CHECK-NONPIC:   internal_le(TPOFF)
   ; CHECK-PIC-LABEL:      f8:
-  ; CHECK-PIC:      internal_le(tpoff)
+  ; CHECK-PIC:      internal_le(TPOFF)
 }
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index ec4278c..a1ca0b7 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep "i(tpoff)"
+; RUN:     grep "i(TPOFF)"
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep "__aeabi_read_tp"
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index f048125..24b4794 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -8,7 +8,7 @@
 define i32 @f() {
 ; CHECK-NONPIC-LABEL: f:
 ; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
-; CHECK-NONPIC: i(gottpoff)
+; CHECK-NONPIC: i(GOTTPOFF)
 ; CHECK-PIC-LABEL: f:
 ; CHECK-PIC: __tls_get_addr
 entry:
@@ -19,7 +19,7 @@ entry:
 define i32* @g() {
 ; CHECK-NONPIC-LABEL: g:
 ; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
-; CHECK-NONPIC: i(gottpoff)
+; CHECK-NONPIC: i(GOTTPOFF)
 ; CHECK-PIC-LABEL: g:
 ; CHECK-PIC: __tls_get_addr
 entry:
diff --git a/test/CodeGen/ARM/trunc_ldr.ll b/test/CodeGen/ARM/trunc_ldr.ll
index 3033c2b..2ce9b89 100644
--- a/test/CodeGen/ARM/trunc_ldr.ll
+++ b/test/CodeGen/ARM/trunc_ldr.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1
-; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 	%struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** }
 	%struct.B = type { float, float, i32, i32, i32, [0 x i8] }
@@ -22,3 +21,10 @@ define i32 @f2(%struct.A* %d) {
         %tmp57 = sext i8 %tmp56 to i32
 	ret i32 %tmp57
 }
+
+; CHECK: ldrb{{.*}}7
+; CHECK-NOT: ldrb{{.*}}7
+
+; CHECK: ldrsb{{.*}}7
+; CHECK-NOT: ldrsb{{.*}}7
+
diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll
index 5665440..360e3e1 100644
--- a/test/CodeGen/ARM/truncstore-dag-combine.ll
+++ b/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v4t | not grep orr
-; RUN: llc < %s -march=arm -mattr=+v4t | not grep mov
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
 
 define void @bar(i8* %P, i16* %Q) {
 entry:
@@ -16,3 +15,7 @@ entry:
 	store i32 %tmp, i32* %P1, align 1
 	ret void
 }
+
+; CHECK-NOT: orr
+; CHECK-NOT: mov
+
diff --git a/test/CodeGen/ARM/tst_teq.ll b/test/CodeGen/ARM/tst_teq.ll
index c83111e..bac4fd9 100644
--- a/test/CodeGen/ARM/tst_teq.ll
+++ b/test/CodeGen/ARM/tst_teq.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm | grep tst
-; RUN: llc < %s -march=arm | grep teq
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f(i32 %a) {
 entry:
@@ -16,3 +15,7 @@ entry:
 	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
 	ret i32 %retval
 }
+
+; CHECK: tst
+; CHECK: teq
+
diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll
index 2172f6b..8da875f 100644
--- a/test/CodeGen/ARM/twoaddrinstr.ll
+++ b/test/CodeGen/ARM/twoaddrinstr.ll
@@ -1,5 +1,5 @@
 ; Tests for the two-address instruction pass.
-; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s
 
 define void @PR13378() nounwind {
 ; This was orriginally a crasher trying to schedule the instructions.
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index e7ff63f..72163ae 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,6 +1,11 @@
-; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED
+; RUN: llc -mtriple=arm-eabi -pre-RA-sched=source %s -o - \
+; RUN:	| FileCheck %s -check-prefix=EXPANDED
+
+; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon -arm-strict-align -pre-RA-sched=source %s -o - \
+; RUN:	| FileCheck %s -check-prefix=EXPANDED
+
+; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 %s -o - \
+; RUN:	| FileCheck %s -check-prefix=UNALIGNED
 
 ; rdar://7113725
 ; rdar://12091029
diff --git a/test/CodeGen/ARM/unaligned_load_store_vector.ll b/test/CodeGen/ARM/unaligned_load_store_vector.ll
index 968a2c7..000ed48 100644
--- a/test/CodeGen/ARM/unaligned_load_store_vector.ll
+++ b/test/CodeGen/ARM/unaligned_load_store_vector.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=arm -mattr=+v7 -mattr=+neon | FileCheck %s
+;RUN: llc -mtriple=arm-eabi -mattr=+v7 -mattr=+neon %s -o - | FileCheck %s
 
 ;ALIGN = 1
 ;SIZE  = 64
diff --git a/test/CodeGen/ARM/unord.ll b/test/CodeGen/ARM/unord.ll
index bd28034..7243e99 100644
--- a/test/CodeGen/ARM/unord.ll
+++ b/test/CodeGen/ARM/unord.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm | grep movne | count 1
-; RUN: llc < %s -march=arm | grep moveq | count 1
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f1(float %X, float %Y) {
 	%tmp = fcmp uno float %X, %Y
@@ -12,3 +11,10 @@ define i32 @f2(float %X, float %Y) {
 	%retval = select i1 %tmp, i32 1, i32 -1
 	ret i32 %retval
 }
+
+; CHECK: movne
+; CHECK-NOT: movne
+
+; CHECK: moveq
+; CHECK-NOT: moveq
+
diff --git a/test/CodeGen/ARM/uxt_rot.ll b/test/CodeGen/ARM/uxt_rot.ll
index 628c079..235416a 100644
--- a/test/CodeGen/ARM/uxt_rot.ll
+++ b/test/CodeGen/ARM/uxt_rot.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1
-; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1
-; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1
+; RUN: llc -mtriple=arm-eabi -mattr=+v6 %s -o - | FileCheck %s
 
 define zeroext i8 @test1(i32 %A.u) {
     %B.u = trunc i32 %A.u to i8
@@ -22,3 +20,13 @@ define zeroext i32 @test3(i32 %A.u) {
     %F.u = zext i16 %E.u to i32
     ret i32 %F.u
 }
+
+; CHECK: uxtb
+; CHECK-NOT: uxtb
+
+; CHECK: uxtab
+; CHECK-NOT: uxtab
+
+; CHECK: uxth
+; CHECK-NOT: uxth
+
diff --git a/test/CodeGen/ARM/v1-constant-fold.ll b/test/CodeGen/ARM/v1-constant-fold.ll
index eb49a81..7421d25 100644
--- a/test/CodeGen/ARM/v1-constant-fold.ll
+++ b/test/CodeGen/ARM/v1-constant-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+v7,+vfp3,-neon  | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+v7,+vfp3,-neon | FileCheck %s
 
 ; PR15611. Check that we don't crash when constant folding v1i32 types.
 
@@ -11,7 +11,7 @@ bb:
   %tmp3 = insertelement <4 x i32> %tmp2, i32 0, i32 3
   %tmp4 = add <4 x i32> %tmp3, <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK:  bl bar
-  tail call void @bar(<4 x i32> %tmp4)
+  call void @bar(<4 x i32> %tmp4)
   ret void
 }
 
diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll
index 97139e9..6478b18 100644
--- a/test/CodeGen/ARM/vaba.ll
+++ b/test/CodeGen/ARM/vaba.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabas8:
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index 2eb6d93..9ba8be2 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabds8:
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
index 96dd38e..3a1aec8 100644
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vabss8:
@@ -28,7 +28,7 @@ define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vabsf32:
 ;CHECK: vabs.f32
 	%tmp1 = load <2 x float>* %A
-	%tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
+	%tmp2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
 
@@ -60,19 +60,19 @@ define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vabsQf32:
 ;CHECK: vabs.f32
 	%tmp1 = load <4 x float>* %A
-	%tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
+	%tmp2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
 
 declare <8 x i8>  @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
 
 define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqabss8:
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index fcb5408..86b0d02 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddi8:
diff --git a/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll
new file mode 100644
index 0000000..19d6cbe
--- /dev/null
+++ b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=arm-nacl-gnueabi | FileCheck %s
+
+declare void @llvm.va_start(i8*)
+declare void @external_func(i8*)
+
+@va_list = external global i8*
+
+; On ARM, varargs arguments are passed in r0-r3 with the rest on the
+; stack.  A varargs function must therefore spill rN-r3 just below the
+; function's initial stack pointer.
+;
+; This test checks for a bug in which a gap was left between the spill
+; area and varargs arguments on the stack when using 16 byte stack
+; alignment.
+
+define void @varargs_func(i32 %arg1, ...) {
+  call void @llvm.va_start(i8* bitcast (i8** @va_list to i8*))
+  call void @external_func(i8* bitcast (i8** @va_list to i8*))
+  ret void
+}
+; CHECK-LABEL: varargs_func:
+; Reserve space for the varargs save area.  This currently reserves
+; more than enough (16 bytes rather than the 12 bytes needed).
+; CHECK: sub sp, sp, #16
+; CHECK: push {lr}
+; Align the stack pointer to a multiple of 16.
+; CHECK: sub sp, sp, #12
+; Calculate the address of the varargs save area and save varargs
+; arguments into it.
+; CHECK-NEXT: add r0, sp, #20
+; CHECK-NEXT: stm r0, {r1, r2, r3}
diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll
index 5f3536c..3b810f3 100644
--- a/test/CodeGen/ARM/vargs.ll
+++ b/test/CodeGen/ARM/vargs.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
+
 @str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00"           ; <[43 x i8]*> [#uses=1]
 
 define i32 @main() {
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
index 7b48441..dfeaacf 100644
--- a/test/CodeGen/ARM/vbits.ll
+++ b/test/CodeGen/ARM/vbits.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck %s
 
 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_andi8:
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll
index 1e53e51..ddc37cc 100644
--- a/test/CodeGen/ARM/vbsl.ll
+++ b/test/CodeGen/ARM/vbsl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 ; rdar://12471808
 
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index 0a1f2eb..e3202e4 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vceqi8:
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 81a59db..3739f5e 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcges8:
@@ -145,7 +145,7 @@ define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vacge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -154,12 +154,12 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vacge.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
-declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
 define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcgei8Z:
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 056866f..2f736f6 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -regalloc=basic %s -o - | FileCheck %s
 
 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgts8:
@@ -146,7 +146,7 @@ define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vacgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
 
@@ -155,7 +155,7 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vacgt.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
 
@@ -172,8 +172,8 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
 	ret <4 x i32> %tmp4
 }
 
-declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
 define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcgti8Z:
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 0b53979..390559b 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 ; NB: this tests vcnt, vclz, and vcls
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
index 527f93b..d611267 100644
--- a/test/CodeGen/ARM/vcombine.ll
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
 
 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK: vcombine8
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index 4f17dc5..af4e6a3 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon,+fp16 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon,+fp16 %s -o - | FileCheck %s
 
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f32tos32:
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index b24be26..89f355c 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -verify-machineinstrs %s -o - \
+; RUN:	| FileCheck %s
 
 define <8 x i8> @v_dup8(i8 %A) nounwind {
 ;CHECK-LABEL: v_dup8:
@@ -331,3 +332,35 @@ define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
   %2 = insertelement  <8  x i8> %1, i8 %x, i32 1
   ret <8 x i8> %2
 }
+
+; Check that an SPR splat produces a vdup.
+
+define <2 x float> @check_spr_splat2(<2 x float> %p, i16 %q) {
+;CHECK-LABEL: check_spr_splat2:
+;CHECK: vdup.32 d
+  %conv = sitofp i16 %q to float
+  %splat.splatinsert = insertelement <2 x float> undef, float %conv, i32 0
+  %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> undef, <2 x i32> zeroinitializer
+  %sub = fsub <2 x float> %splat.splat, %p
+  ret <2 x float> %sub
+}
+
+define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) {
+;CHECK-LABEL: check_spr_splat4:
+;CHECK: vdup.32 q
+  %conv = sitofp i16 %q to float
+  %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+  %sub = fsub <4 x float> %splat.splat, %p
+  ret <4 x float> %sub
+}
+
+define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) {
+;CHECK-LABEL: check_spr_splat4_lane1:
+;CHECK: vdup.32 q{{.*}}, d{{.*}}[1]
+  %conv = sitofp i16 %q to float
+  %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 1
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %sub = fsub <4 x float> %splat.splat, %p
+  ret <4 x float> %sub
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 5555a47..4407451 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd:
diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll
index a23db7b..4b2fea9 100644
--- a/test/CodeGen/ARM/vfcmp.ll
+++ b/test/CodeGen/ARM/vfcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 ; This tests fcmp operations that do not map directly to NEON instructions.
 
diff --git a/test/CodeGen/ARM/vfp-regs-dwarf.ll b/test/CodeGen/ARM/vfp-regs-dwarf.ll
new file mode 100644
index 0000000..4976729
--- /dev/null
+++ b/test/CodeGen/ARM/vfp-regs-dwarf.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
+
+; Generated from:
+;     void stack_offsets() {
+;       asm("" ::: "d8", "d9", "d11", "d13");
+;     }
+; Compiled with: "clang -target armv7-linux-gnueabihf -O3"
+
+; The important point we're checking here is that the .cfi directives describe
+; the layout of the VFP registers correctly. The fact that the numbers are
+; monotonic in memory is also a nice property to have.
+
+define void @stack_offsets() {
+; CHECK-LABEL: stack_offsets:
+; CHECK: vpush {d13}
+; CHECK: vpush {d11}
+; CHECK: vpush {d8, d9}
+
+; CHECK: .cfi_offset {{269|d13}}, -8
+; CHECK: .cfi_offset {{267|d11}}, -16
+; CHECK: .cfi_offset {{265|d9}}, -24
+; CHECK: .cfi_offset {{264|d8}}, -32
+
+; CHECK: vpop {d8, d9}
+; CHECK: vpop {d11}
+; CHECK: vpop {d13}
+  call void asm sideeffect "", "~{d8},~{d9},~{d11},~{d13}"() #1
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/Users/tim/llvm/build/tmp.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"tmp.c", metadata !"/Users/tim/llvm/build"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @stack_offsets, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/tim/llvm/build/tmp.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+
diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll
index 9c2ed57..6183db3 100644
--- a/test/CodeGen/ARM/vhadd.ll
+++ b/test/CodeGen/ARM/vhadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhadds8:
diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll
index 4bc2e87..f1a0cb2 100644
--- a/test/CodeGen/ARM/vhsub.ll
+++ b/test/CodeGen/ARM/vhsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubs8:
diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll
index 0a8f103..bebb320 100644
--- a/test/CodeGen/ARM/vicmp.ll
+++ b/test/CodeGen/ARM/vicmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm -mattr=+neon %s -o - | FileCheck %s
 
 ; This tests icmp operations that do not map directly to NEON instructions.
 ; Not-equal (ne) operations are implemented by VCEQ/VMVN.  Less-than (lt/ult)
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index 444d0d5..caeeada 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
+
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -regalloc=basic %s -o - \
+; RUN:	| FileCheck %s
 
 define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK-LABEL: vld1i8:
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index fddafea..7ac5cc7 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
 %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 400541f..171a03c 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o -| FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -regalloc=basic %s -o - | FileCheck %s
 
 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
 %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -83,6 +83,19 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 	ret <1 x i64> %tmp4
 }
 
+define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
+;CHECK-LABEL: vld3i64_update:
+;CHECK: vld1.64	{d16, d17, d18}, [r1:64]!
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+        %tmp5 = getelementptr i64* %A, i32 3
+        store i64* %tmp5, i64** %ptr
+        %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+        ret <1 x i64> %tmp4
+}
+
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index f7376b5..94ad143 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 %struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
 %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
@@ -83,6 +83,19 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 	ret <1 x i64> %tmp4
 }
 
+define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
+;CHECK-LABEL: vld4i64_update:
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+        %tmp5 = getelementptr i64* %A, i32 4
+        store i64* %tmp5, i64** %ptr
+        %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+        ret <1 x i64> %tmp4
+}
+
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 5509f3e..64aac56 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vld1dupi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld1dupi8:
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 7a83a4c..c7d69ff 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
+
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -regalloc=basic %s -o - \
+; RUN:	| FileCheck %s
 
 define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld1lanei8:
diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll
index 81f4578..1167ebe 100644
--- a/test/CodeGen/ARM/vminmax.ll
+++ b/test/CodeGen/ARM/vminmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmins8:
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
index caf6556..6073fc5 100644
--- a/test/CodeGen/ARM/vmla.ll
+++ b/test/CodeGen/ARM/vmla.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlai8:
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
index 61f3424..f86739c 100644
--- a/test/CodeGen/ARM/vmls.ll
+++ b/test/CodeGen/ARM/vmls.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlsi8:
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index 8b63138..7900af4 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @v_movi8() nounwind {
 ;CHECK-LABEL: v_movi8:
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index de329ac..0fa43d8 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 
 define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmuli8:
diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll
index 1be4f74..4d548dd 100644
--- a/test/CodeGen/ARM/vneg.ll
+++ b/test/CodeGen/ARM/vneg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vnegs8:
diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll
index a616a8d..ffeac73 100644
--- a/test/CodeGen/ARM/vpadal.ll
+++ b/test/CodeGen/ARM/vpadal.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadals8:
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index f84721f..01cb1c7 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpaddi8:
@@ -152,6 +152,17 @@ define void @addCombineToVPADDL() nounwind ssp {
   ret void
 }
 
+; Legalization produces a EXTRACT_VECTOR_ELT DAG node which performs an extend from
+; i16 to i32. In this case the input for the formed VPADDL needs to be a vector of i16s.
+define <2 x i16> @fromExtendingExtractVectorElt(<4 x i16> %in) {
+;CHECK-LABEL: fromExtendingExtractVectorElt:
+;CHECK: vpaddl.s16
+  %tmp1 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> <i32 0, i32 2>
+  %tmp2 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> <i32 1, i32 3>
+  %x = add <2 x i16> %tmp2, %tmp1
+  ret <2 x i16> %x
+}
+
 declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
 declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll
index c68b319..0b893e5 100644
--- a/test/CodeGen/ARM/vpminmax.ll
+++ b/test/CodeGen/ARM/vpminmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpmins8:
diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll
index 7840766..81acc8b 100644
--- a/test/CodeGen/ARM/vqadd.ll
+++ b/test/CodeGen/ARM/vqadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqadds8:
diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll
index b5cd716..4afef6d 100644
--- a/test/CodeGen/ARM/vqshl.ll
+++ b/test/CodeGen/ARM/vqshl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshls8:
diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll
index 4abae70..f02482c 100644
--- a/test/CodeGen/ARM/vqshrn.ll
+++ b/test/CodeGen/ARM/vqshrn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshrns8:
diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll
index 90bc349..4af4380 100644
--- a/test/CodeGen/ARM/vqsub.ll
+++ b/test/CodeGen/ARM/vqsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubs8:
diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll
index c0deca9..91979e5 100644
--- a/test/CodeGen/ARM/vrec.ll
+++ b/test/CodeGen/ARM/vrec.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrecpei32:
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index b6da694..eb76ba6 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D8:
diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll
index 7e1f714..746b1b0 100644
--- a/test/CodeGen/ARM/vsel.ll
+++ b/test/CodeGen/ARM/vsel.ll
@@ -61,7 +61,7 @@ define void @test_vsel32slt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
   %val1 = select i1 %tst1, float %a, float %b
   store float %val1, float* @varfloat
 ; CHECK: cmp r0, r1
-; CHECK: vselgt.f32 s0, s1, s0
+; CHECK: vselge.f32 s0, s1, s0
   ret void
 }
 define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
@@ -70,7 +70,7 @@ define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
   %val1 = select i1 %tst1, double %a, double %b
   store double %val1, double* @vardouble
 ; CHECK: cmp r0, r1
-; CHECK: vselgt.f64 d16, d1, d0
+; CHECK: vselge.f64 d16, d1, d0
   ret void
 }
 define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) {
@@ -79,7 +79,7 @@ define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) {
   %val1 = select i1 %tst1, float %a, float %b
   store float %val1, float* @varfloat
 ; CHECK: cmp r0, r1
-; CHECK: vselge.f32 s0, s1, s0
+; CHECK: vselgt.f32 s0, s1, s0
   ret void
 }
 define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) {
@@ -88,7 +88,7 @@ define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) {
   %val1 = select i1 %tst1, double %a, double %b
   store double %val1, double* @vardouble
 ; CHECK: cmp r0, r1
-; CHECK: vselge.f64 d16, d1, d0
+; CHECK: vselgt.f64 d16, d1, d0
   ret void
 }
 define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) {
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
index 9ea56a4..e999034 100644
--- a/test/CodeGen/ARM/vselect_imax.ll
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 ; Make sure that ARM backend with NEON handles vselect.
 
 define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll
index de380d3..618a137 100644
--- a/test/CodeGen/ARM/vshift.ll
+++ b/test/CodeGen/ARM/vshift.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshls8:
@@ -180,7 +180,7 @@ define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vlshri8:
 ;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
-	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	%tmp2 = lshr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
 
@@ -188,7 +188,7 @@ define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vlshri16:
 ;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
-	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
+	%tmp2 = lshr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
 
@@ -196,7 +196,7 @@ define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vlshri32:
 ;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
-	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
+	%tmp2 = lshr <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
 
@@ -204,7 +204,7 @@ define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vlshri64:
 ;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
-	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
+	%tmp2 = lshr <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
 
@@ -252,7 +252,7 @@ define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi8:
 ;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
-	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	%tmp2 = lshr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
 
@@ -260,7 +260,7 @@ define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi16:
 ;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
-	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+	%tmp2 = lshr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
 
@@ -268,7 +268,7 @@ define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi32:
 ;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
-	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
+	%tmp2 = lshr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
 
@@ -276,7 +276,7 @@ define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi64:
 ;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
-	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
+	%tmp2 = lshr <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
 
@@ -331,7 +331,7 @@ define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vashri8:
 ;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
-	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	%tmp2 = ashr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
 
@@ -339,7 +339,7 @@ define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vashri16:
 ;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
-	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
+	%tmp2 = ashr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
 
@@ -347,7 +347,7 @@ define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vashri32:
 ;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
-	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
+	%tmp2 = ashr <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
 
@@ -355,7 +355,7 @@ define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vashri64:
 ;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
-	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
+	%tmp2 = ashr <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
 
@@ -403,7 +403,7 @@ define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vashrQi8:
 ;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
-	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	%tmp2 = ashr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
 
@@ -411,7 +411,7 @@ define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vashrQi16:
 ;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
-	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+	%tmp2 = ashr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
 
@@ -419,7 +419,7 @@ define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vashrQi32:
 ;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
-	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
+	%tmp2 = ashr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
 
@@ -427,6 +427,6 @@ define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vashrQi64:
 ;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
-	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
+	%tmp2 = ashr <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll
index 27610bf..9526c32 100644
--- a/test/CodeGen/ARM/vshiftins.ll
+++ b/test/CodeGen/ARM/vshiftins.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsli8:
diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll
index 462f7fe..6228652 100644
--- a/test/CodeGen/ARM/vshl.ll
+++ b/test/CodeGen/ARM/vshl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshls8:
diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll
index ae80664..27873eb 100644
--- a/test/CodeGen/ARM/vshll.ll
+++ b/test/CodeGen/ARM/vshll.ll
@@ -1,51 +1,57 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlls8:
 ;CHECK: vshll.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
-	ret <8 x i16> %tmp2
+        %tmp1 = load <8 x i8>* %A
+        %sext = sext <8 x i8> %tmp1 to <8 x i16>
+        %shift = shl <8 x i16> %sext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+        ret <8 x i16> %shift
 }
 
 define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlls16:
 ;CHECK: vshll.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
-	ret <4 x i32> %tmp2
+        %tmp1 = load <4 x i16>* %A
+        %sext = sext <4 x i16> %tmp1 to <4 x i32>
+        %shift = shl <4 x i32> %sext, <i32 15, i32 15, i32 15, i32 15>
+        ret <4 x i32> %shift
 }
 
 define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlls32:
 ;CHECK: vshll.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
-	ret <2 x i64> %tmp2
+        %tmp1 = load <2 x i32>* %A
+        %sext = sext <2 x i32> %tmp1 to <2 x i64>
+        %shift = shl <2 x i64> %sext, <i64 31, i64 31>
+        ret <2 x i64> %shift
 }
 
 define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshllu8:
 ;CHECK: vshll.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
-	ret <8 x i16> %tmp2
+        %tmp1 = load <8 x i8>* %A
+        %zext = zext <8 x i8> %tmp1 to <8 x i16>
+        %shift = shl <8 x i16> %zext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+        ret <8 x i16> %shift
 }
 
 define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshllu16:
 ;CHECK: vshll.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
-	ret <4 x i32> %tmp2
+        %tmp1 = load <4 x i16>* %A
+        %zext = zext <4 x i16> %tmp1 to <4 x i32>
+        %shift = shl <4 x i32> %zext, <i32 15, i32 15, i32 15, i32 15>
+        ret <4 x i32> %shift
 }
 
 define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshllu32:
 ;CHECK: vshll.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
-	ret <2 x i64> %tmp2
+        %tmp1 = load <2 x i32>* %A
+        %zext = zext <2 x i32> %tmp1 to <2 x i64>
+        %shift = shl <2 x i64> %zext, <i64 31, i64 31>
+        ret <2 x i64> %shift
 }
 
 ; The following tests use the maximum shift count, so the signedness is
@@ -53,31 +59,58 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlli8:
 ;CHECK: vshll.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >)
-	ret <8 x i16> %tmp2
+        %tmp1 = load <8 x i8>* %A
+        %sext = sext <8 x i8> %tmp1 to <8 x i16>
+        %shift = shl <8 x i16> %sext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+        ret <8 x i16> %shift
 }
 
 define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlli16:
 ;CHECK: vshll.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >)
-	ret <4 x i32> %tmp2
+        %tmp1 = load <4 x i16>* %A
+        %zext = zext <4 x i16> %tmp1 to <4 x i32>
+        %shift = shl <4 x i32> %zext, <i32 16, i32 16, i32 16, i32 16>
+        ret <4 x i32> %shift
 }
 
 define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlli32:
 ;CHECK: vshll.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >)
-	ret <2 x i64> %tmp2
+        %tmp1 = load <2 x i32>* %A
+        %zext = zext <2 x i32> %tmp1 to <2 x i64>
+        %shift = shl <2 x i64> %zext, <i64 32, i64 32>
+        ret <2 x i64> %shift
 }
 
-declare <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+; And these have a shift just out of range so separate vmovl and vshl
+; instructions are needed.
+define <8 x i16> @vshllu8_bad(<8 x i8>* %A) nounwind {
+; CHECK-LABEL: vshllu8_bad:
+; CHECK: vmovl.u8
+; CHECK: vshl.i16
+        %tmp1 = load <8 x i8>* %A
+        %zext = zext <8 x i8> %tmp1 to <8 x i16>
+        %shift = shl <8 x i16> %zext, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+        ret <8 x i16> %shift
+}
+
+define <4 x i32> @vshlls16_bad(<4 x i16>* %A) nounwind {
+; CHECK-LABEL: vshlls16_bad:
+; CHECK: vmovl.s16
+; CHECK: vshl.i32
+        %tmp1 = load <4 x i16>* %A
+        %sext = sext <4 x i16> %tmp1 to <4 x i32>
+        %shift = shl <4 x i32> %sext, <i32 17, i32 17, i32 17, i32 17>
+        ret <4 x i32> %shift
+}
 
-declare <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+define <2 x i64> @vshllu32_bad(<2 x i32>* %A) nounwind {
+; CHECK-LABEL: vshllu32_bad:
+; CHECK: vmovl.u32
+; CHECK: vshl.i64
+        %tmp1 = load <2 x i32>* %A
+        %zext = zext <2 x i32> %tmp1 to <2 x i64>
+        %shift = shl <2 x i64> %zext, <i64 33, i64 33>
+        ret <2 x i64> %shift
+}
diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll
index 40a94fe..8aa009a 100644
--- a/test/CodeGen/ARM/vshrn.ll
+++ b/test/CodeGen/ARM/vshrn.ll
@@ -1,32 +1,61 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrns8:
 ;CHECK: vshrn.i16
 	%tmp1 = load <8 x i16>* %A
-	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
-	ret <8 x i8> %tmp2
+        %tmp2 = lshr <8 x i16> %tmp1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+        %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
+	ret <8 x i8> %tmp3
 }
 
 define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrns16:
 ;CHECK: vshrn.i32
 	%tmp1 = load <4 x i32>* %A
-	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
-	ret <4 x i16> %tmp2
+        %tmp2 = ashr <4 x i32> %tmp1, <i32 16, i32 16, i32 16, i32 16>
+        %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
+	ret <4 x i16> %tmp3
 }
 
 define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrns32:
 ;CHECK: vshrn.i64
 	%tmp1 = load <2 x i64>* %A
-	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
-	ret <2 x i32> %tmp2
+        %tmp2 = ashr <2 x i64> %tmp1, <i64 32, i64 32>
+        %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vshrns8_bad(<8 x i16>* %A) nounwind {
+; CHECK-LABEL: vshrns8_bad:
+; CHECK: vshr.s16
+; CHECK: vmovn.i16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = ashr <8 x i16> %tmp1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+        %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshrns16_bad(<4 x i32>* %A) nounwind {
+; CHECK-LABEL: vshrns16_bad:
+; CHECK: vshr.u32
+; CHECK: vmovn.i32
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = lshr <4 x i32> %tmp1, <i32 17, i32 17, i32 17, i32 17>
+        %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
+        ret <4 x i16> %tmp3
 }
 
-declare <8 x i8>  @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+define <2 x i32> @vshrns32_bad(<2 x i64>* %A) nounwind {
+; CHECK-LABEL: vshrns32_bad:
+; CHECK: vshr.u64
+; CHECK: vmovn.i64
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = lshr <2 x i64> %tmp1, <i64 33, i64 33>
+        %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
+        ret <2 x i32> %tmp3
+}
 
 define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrns8:
diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll
index 7a211c3..fa5985a 100644
--- a/test/CodeGen/ARM/vsra.ll
+++ b/test/CodeGen/ARM/vsra.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsras8:
 ;CHECK: vsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
-        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+    %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
 }
 
@@ -15,7 +15,7 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
+	%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
 }
@@ -25,7 +25,7 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 >
+	%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
 }
@@ -35,7 +35,7 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
-	%tmp3 = ashr <1 x i64> %tmp2, < i64 64 >
+	%tmp3 = ashr <1 x i64> %tmp2, < i64 63 >
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
 }
@@ -45,7 +45,7 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
 }
@@ -55,7 +55,7 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+	%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
 }
@@ -65,7 +65,7 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
+	%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
 }
@@ -75,7 +75,7 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: vsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 >
+	%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
 }
@@ -85,7 +85,7 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
 }
@@ -95,7 +95,7 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
+	%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
 }
@@ -105,7 +105,7 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 >
+	%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
 }
@@ -115,7 +115,7 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
-	%tmp3 = lshr <1 x i64> %tmp2, < i64 64 >
+	%tmp3 = lshr <1 x i64> %tmp2, < i64 63 >
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
 }
@@ -125,7 +125,7 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
 }
@@ -135,7 +135,7 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+	%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
 }
@@ -145,7 +145,7 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
+	%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
 }
@@ -155,7 +155,7 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK: vsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
-	%tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 >
+	%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
 }
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 36439fd..14f3ff0 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1i8:
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 7551a56..2180259 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst2i8:
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 91eb7fc..5f150ed 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -fast-isel=0 -O0 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -fast-isel=0 -O0 %s -o - | FileCheck %s
 
 define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst3i8:
@@ -61,6 +61,18 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 	ret void
 }
 
+define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: vst3i64_update
+;CHECK: vst1.64	{d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+        %A = load i64** %ptr
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = load <1 x i64>* %B
+        call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+        %tmp2 = getelementptr i64* %A, i32 3
+        store i64* %tmp2, i64** %ptr
+        ret void
+}
+
 define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index ef5c83a..44c76b5 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst4i8:
@@ -60,6 +60,18 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 	ret void
 }
 
+define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: vst4i64_update:
+;CHECK: vst1.64	{d16, d17, d18, d19}, [r1]!
+        %A = load i64** %ptr
+        %tmp0 = bitcast i64* %A to i8*
+        %tmp1 = load <1 x i64>* %B
+        call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+        %tmp2 = getelementptr i64* %A, i32 4
+        store i64* %tmp2, i64** %ptr
+        ret void
+}
+
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 34c5c70..7dd6e7b 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm -mattr=+neon %s -o - | FileCheck %s
 
 define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1lanei8:
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index 6b95b97..d1a094b 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubi8:
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
index 21614b0..32258a3 100644
--- a/test/CodeGen/ARM/vtbl.ll
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>, <8 x i8> }
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index 7d101bc..cdae7f8 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtrni8:
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 2d193c1..832be6c 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vuzpi8:
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index f71aef7..f74dc62 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vzipi8:
diff --git a/test/CodeGen/ARM/warn-stack.ll b/test/CodeGen/ARM/warn-stack.ll
index 9538bbf..90a3e1f 100644
--- a/test/CodeGen/ARM/warn-stack.ll
+++ b/test/CodeGen/ARM/warn-stack.ll
@@ -12,7 +12,7 @@ entry:
   ret void
 }
 
-; CHECK: warning: Stack size limit exceeded (96) in warn.
+; CHECK: warning: stack size limit exceeded (96) in warn
 define void @warn() nounwind ssp {
 entry:
   %buffer = alloca [80 x i8], align 1
diff --git a/test/CodeGen/ARM/weak.ll b/test/CodeGen/ARM/weak.ll
index 5ac4b8c..375ce22 100644
--- a/test/CodeGen/ARM/weak.ll
+++ b/test/CodeGen/ARM/weak.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm | grep .weak.*f
-; RUN: llc < %s -march=arm | grep .weak.*h
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define weak i32 @f() {
 entry:
@@ -14,3 +13,6 @@ entry:
 
 declare extern_weak void @h()
 
+; CHECK: {{.}}weak{{.*}}f
+; CHECK: {{.}}weak{{.*}}h
+
diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll
index cf327bb..82ab90e 100644
--- a/test/CodeGen/ARM/weak2.ll
+++ b/test/CodeGen/ARM/weak2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep .weak
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f(i32 %a) {
 entry:
@@ -16,3 +16,6 @@ UnifiedReturnBlock:		; preds = %entry
 }
 
 declare extern_weak i32 @test_weak(...)
+
+; CHECK: {{.}}weak
+
diff --git a/test/CodeGen/ARM/zero-cycle-zero.ll b/test/CodeGen/ARM/zero-cycle-zero.ll
new file mode 100644
index 0000000..121a87f
--- /dev/null
+++ b/test/CodeGen/ARM/zero-cycle-zero.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK-CYCLONE
+; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-SWIFT
+
+declare arm_aapcs_vfpcc void @take_vec64(<2 x i32>)
+
+define void @test_vec64() {
+; CHECK-CYCLONE-LABEL: test_vec64:
+; CHECK-SWIFT-LABEL: test_vec64:
+
+  call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
+  call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
+; CHECK-CYCLONE-NOT: vmov.f64 d0,
+; CHECK-CYCLONE: vmov.i32 d0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: vmov.i32 d0, #0
+; CHECK-CYCLONE: bl
+
+; CHECK-SWIFT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-SWIFT: vmov.i32 [[ZEROREG]], #0
+; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @take_vec128(<8 x i16>)
+
+define void @test_vec128() {
+; CHECK-CYCLONE-LABEL: test_vec128:
+; CHECK-SWIFT-LABEL: test_vec128:
+
+  call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-CYCLONE: vmov.i32 q0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: vmov.i32 q0, #0
+; CHECK-CYCLONE: bl
+
+; CHECK-SWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-SWIFT: vmov.i32 [[ZEROREG:q[0-9]+]], #0
+; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
+; CHECK-SWIFT: bl
+
+  ret void
+}
+
+declare void @take_i32(i32)
+
+define void @test_i32() {
+; CHECK-CYCLONE-LABEL: test_i32:
+; CHECK-SWIFT-LABEL: test_i32:
+
+  call arm_aapcs_vfpcc void @take_i32(i32 0)
+  call arm_aapcs_vfpcc void @take_i32(i32 0)
+; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK-CYCLONE: mov r0, #0
+; CHECK-CYCLONE: bl
+; CHECK-CYCLONE: mov r0, #0
+; CHECK-CYCLONE: bl
+
+; It doesn't particularly matter what Swift does here, there isn't carefully
+; crafted behaviour that we might break in Cyclone.
+
+  ret void
+}
author	Stephen Hines <srhines@google.com>	2014-04-23 16:57:46 -0700
committer	Stephen Hines <srhines@google.com>	2014-04-24 15:53:16 -0700
commit	36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
tree	e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /test/CodeGen/ARM
parent	69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
download	external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2