753 files changed, 44249 insertions, 2473 deletions
diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll
index 59725cf..c1cf587 100644
--- a/test/Analysis/BasicAA/intrinsics.ll
+++ b/test/Analysis/BasicAA/intrinsics.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 
 ; CHECK:      define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR:#[0-9]+]]
 ; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
@@ -22,7 +22,7 @@ entry:
 ; CHECK:      define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %q = getelementptr i8* %p, i64 16
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR]]
 ; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
@@ -37,3 +37,6 @@ entry:
 
 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
 declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
+; CHECK: attributes #0 = { nounwind readonly }
+; CHECK: attributes [[ATTR]] = { nounwind }
diff --git a/test/Analysis/BasicAA/pure-const-dce.ll b/test/Analysis/BasicAA/pure-const-dce.ll
index 266e607..e489928 100644
--- a/test/Analysis/BasicAA/pure-const-dce.ll
+++ b/test/Analysis/BasicAA/pure-const-dce.ll
@@ -4,11 +4,11 @@
 
 ; CHECK:      @test
 ; CHECK:      entry
-; CHECK:      %tmp0 = call i32 @TestConst(i32 5) readnone
-; CHECK-NEXT: %tmp1 = call i32 @TestPure(i32 6) readonly
+; CHECK:      %tmp0 = call i32 @TestConst(i32 5) [[READNONE:#[0-9]+]]
+; CHECK-NEXT: %tmp1 = call i32 @TestPure(i32 6) [[READONLY:#[0-9]+]]
 ; CHECK-NEXT: %tmp2 = call i32 @TestNone(i32 7)
 ; CHECK-NEXT: store i32 1, i32* @g
-; CHECK-NEXT: %tmp5 = call i32 @TestPure(i32 6) readonly
+; CHECK-NEXT: %tmp5 = call i32 @TestPure(i32 6) [[READONLY]]
 ; CHECK-NEXT: %tmp7 = call i32 @TestNone(i32 7)
 ; CHECK-NEXT: %tmp8 = call i32 @TestNone(i32 7)
 ; CHECK-NEXT: %sum0 = add i32 %tmp0, %tmp1
@@ -49,3 +49,6 @@ declare i32 @TestConst(i32) readnone
 declare i32 @TestPure(i32) readonly
 
 declare i32 @TestNone(i32)
+
+; CHECK: attributes [[READNONE]] = { readnone }
+; CHECK: attributes [[READONLY]] = { readonly }
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
new file mode 100644
index 0000000..464b6ec
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -0,0 +1,158 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define i32 @casts() {
+
+    ; -- scalars --
+  ; CHECK: cost of 1 {{.*}} sext
+  %r0 = sext i1 undef to i8
+  ; CHECK: cost of 1 {{.*}} zext
+  %r1 = zext i1 undef to i8
+  ; CHECK: cost of 1 {{.*}} sext
+  %r2 = sext i1 undef to i16
+  ; CHECK: cost of 1 {{.*}} zext
+  %r3 = zext i1 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r4 = sext i1 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r5 = zext i1 undef to i32
+  ; CHECK: cost of 1 {{.*}} sext
+  %r6 = sext i1 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r7 = zext i1 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r8 = trunc i8 undef to i1
+  ; CHECK: cost of 1 {{.*}} sext
+  %r9 = sext i8 undef to i16
+  ; CHECK: cost of 1 {{.*}} zext
+  %r10 = zext i8 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r11 = sext i8 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r12 = zext i8 undef to i32
+  ; CHECK: cost of 1 {{.*}} sext
+  %r13 = sext i8 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r14 = zext i8 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r15 = trunc i16 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r16 = trunc i16 undef to i8
+  ; CHECK: cost of 1 {{.*}} sext
+  %r17 = sext i16 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r18 = zext i16 undef to i32
+  ; CHECK: cost of 2 {{.*}} sext
+  %r19 = sext i16 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r20 = zext i16 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r21 = trunc i32 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r22 = trunc i32 undef to i8
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r23 = trunc i32 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r24 = sext i32 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r25 = zext i32 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r26 = trunc i64 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r27 = trunc i64 undef to i8
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r28 = trunc i64 undef to i16
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r29 = trunc i64 undef to i32
+
+    ; -- floating point conversions --
+  ; Moves between scalar and NEON registers.
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r30 = fptoui float undef to i1
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r31 = fptosi float undef to i1
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r32 = fptoui float undef to i8
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r33 = fptosi float undef to i8
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r34 = fptoui float undef to i16
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r35 = fptosi float undef to i16
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r36 = fptoui float undef to i32
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r37 = fptosi float undef to i32
+  ; CHECK: cost of 10 {{.*}} fptoui
+  %r38 = fptoui float undef to i64
+  ; CHECK: cost of 10 {{.*}} fptosi
+  %r39 = fptosi float undef to i64
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r40 = fptoui double undef to i1
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r41 = fptosi double undef to i1
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r42 = fptoui double undef to i8
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r43 = fptosi double undef to i8
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r44 = fptoui double undef to i16
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r45 = fptosi double undef to i16
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r46 = fptoui double undef to i32
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r47 = fptosi double undef to i32
+  ; Function call
+  ; CHECK: cost of 10 {{.*}} fptoui
+  %r48 = fptoui double undef to i64
+  ; CHECK: cost of 10 {{.*}} fptosi
+  %r49 = fptosi double undef to i64
+
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r50 = sitofp i1 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r51 = uitofp i1 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r52 = sitofp i1 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r53 = uitofp i1 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r54 = sitofp i8 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r55 = uitofp i8 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r56 = sitofp i8 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r57 = uitofp i8 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r58 = sitofp i16 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r59 = uitofp i16 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r60 = sitofp i16 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r61 = uitofp i16 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r62 = sitofp i32 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r63 = uitofp i32 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r64 = sitofp i32 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r65 = uitofp i32 undef to double
+  ; Function call
+  ; CHECK: cost of 10 {{.*}} sitofp
+  %r66 = sitofp i64 undef to float
+  ; CHECK: cost of 10 {{.*}} uitofp
+  %r67 = uitofp i64 undef to float
+  ; CHECK: cost of 10 {{.*}} sitofp
+  %r68 = sitofp i64 undef to double
+  ; CHECK: cost of 10 {{.*}} uitofp
+  %r69 = uitofp i64 undef to double
+
+  ;CHECK: cost of 0 {{.*}} ret
+  ret i32 undef
+}
+
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
new file mode 100644
index 0000000..a63b87d
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -0,0 +1,43 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define void @test_geps() {
+  ; Cost of scalar integer geps should be one. We can't always expect it to be
+  ; folded into the instruction addressing mode.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8*
+  %a0 = getelementptr inbounds i8* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16*
+  %a1 = getelementptr inbounds i16* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32*
+  %a2 = getelementptr inbounds i32* undef, i32 0
+
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64*
+  %a3 = getelementptr inbounds i64* undef, i32 0
+
+  ; Cost of scalar floating point geps should be one. We cannot fold the address
+  ; computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float*
+  %a4 = getelementptr inbounds float* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double*
+  %a5 = getelementptr inbounds double* undef, i32 0
+
+
+  ; Cost of vector geps should be one. We cannot fold the address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
+  %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
+  %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
+  %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
+  %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>*
+  %a11 = getelementptr inbounds <4 x float>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>*
+  %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/insertelement.ll b/test/Analysis/CostModel/ARM/insertelement.ll
new file mode 100644
index 0000000..f951b08
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/insertelement.ll
@@ -0,0 +1,46 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; Multiple insert elements from loads into d subregisters are expensive on swift
+; due to renaming constraints.
+%T_i8v = type <8 x i8>
+%T_i8 = type i8
+; CHECK: insertelement_i8
+define void @insertelement_i8(%T_i8* %saddr,
+                           %T_i8v* %vaddr) {
+  %v0 = load %T_i8v* %vaddr
+  %v1 = load %T_i8* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
+  %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
+  store %T_i8v %v2, %T_i8v* %vaddr
+  ret void
+}
+
+
+%T_i16v = type <4 x i16>
+%T_i16 = type i16
+; CHECK: insertelement_i16
+define void @insertelement_i16(%T_i16* %saddr,
+                           %T_i16v* %vaddr) {
+  %v0 = load %T_i16v* %vaddr
+  %v1 = load %T_i16* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
+  %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
+  store %T_i16v %v2, %T_i16v* %vaddr
+  ret void
+}
+
+%T_i32v = type <2 x i32>
+%T_i32 = type i32
+; CHECK: insertelement_i32
+define void @insertelement_i32(%T_i32* %saddr,
+                           %T_i32v* %vaddr) {
+  %v0 = load %T_i32v* %vaddr
+  %v1 = load %T_i32* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
+  %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
+  store %T_i32v %v2, %T_i32v* %vaddr
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/lit.local.cfg b/test/Analysis/CostModel/ARM/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Analysis/CostModel/ARM/select.ll b/test/Analysis/CostModel/ARM/select.ll
new file mode 100644
index 0000000..96afccf
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/select.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; CHECK: casts
+define void @casts() {
+    ; Scalar values
+  ; CHECK: cost of 1 {{.*}} select
+  %v1 = select i1 undef, i8 undef, i8 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v2 = select i1 undef, i16 undef, i16 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v3 = select i1 undef, i32 undef, i32 undef
+  ; CHECK: cost of 2 {{.*}} select
+  %v4 = select i1 undef, i64 undef, i64 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v5 = select i1 undef, float undef, float undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v6 = select i1 undef, double undef, double undef
+
+    ; Vector values
+  ; CHECK: cost of 1 {{.*}} select
+  %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v8 = select <4 x i1>  undef, <4 x i8> undef, <4 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v9 = select <8 x i1>  undef, <8 x i8> undef, <8 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v10 = select <16 x i1>  undef, <16 x i8> undef, <16 x i8> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v12 = select <4 x i1>  undef, <4 x i16> undef, <4 x i16> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v13 = select <8 x i1>  undef, <8 x i16> undef, <8 x i16> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v15 = select <4 x i1>  undef, <4 x i32> undef, <4 x i32> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v18 = select <4 x i1>  undef, <4 x float> undef, <4 x float> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v19 = select <2 x i1>  undef, <2 x double> undef, <2 x double> undef
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/shuffle.ll b/test/Analysis/CostModel/ARM/shuffle.ll
new file mode 100644
index 0000000..c92d668
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/shuffle.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; CHECK: shuffle
+define void @shuffle() {
+
+
+  ;; Reverse shuffles should be lowered to vrev and possibly a vext (for
+  ;; quadwords)
+
+    ; Vector values
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v9 = shufflevector <8 x i8> undef, <8 x i8>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v10 = shufflevector <16 x i8> undef, <16 x i8>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v11 = shufflevector <2 x i16> undef, <2 x i16>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v12 = shufflevector <4 x i16> undef, <4 x i16>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v13 = shufflevector <8 x i16> undef, <8 x i16>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v14 = shufflevector <2 x i32> undef, <2 x i32>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v15 = shufflevector <4 x i32> undef, <4 x i32>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v16 = shufflevector <2 x float> undef, <2 x float>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v17 = shufflevector <4 x float> undef, <4 x float>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/PowerPC/insert_extract.ll b/test/Analysis/CostModel/PowerPC/insert_extract.ll
new file mode 100644
index 0000000..f51963d
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/insert_extract.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @insert(i32 %arg) {
+  ; CHECK: cost of 13 {{.*}} insertelement
+  %x = insertelement <4 x i32> undef, i32 %arg, i32 0
+  ret i32 undef
+}
+
+define i32 @extract(<4 x i32> %arg) {
+  ; CHECK: cost of 13 {{.*}} extractelement
+  %x = extractelement <4 x i32> %arg, i32 0
+  ret i32 %x
+}
+
diff --git a/test/Analysis/CostModel/PowerPC/lit.local.cfg b/test/Analysis/CostModel/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..4019eca
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
new file mode 100644
index 0000000..c77cce9
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @stores(i32 %arg) {
+
+  ; CHECK: cost of 1 {{.*}} store
+  store i8 undef, i8* undef, align 4
+  ; CHECK: cost of 1 {{.*}} store
+  store i16 undef, i16* undef, align 4
+  ; CHECK: cost of 1 {{.*}} store
+  store i32 undef, i32* undef, align 4
+  ; CHECK: cost of 2 {{.*}} store
+  store i64 undef, i64* undef, align 4
+  ; CHECK: cost of 4 {{.*}} store
+  store i128 undef, i128* undef, align 4
+
+  ret i32 undef
+}
+define i32 @loads(i32 %arg) {
+  ; CHECK: cost of 1 {{.*}} load
+  load i8* undef, align 4
+  ; CHECK: cost of 1 {{.*}} load
+  load i16* undef, align 4
+  ; CHECK: cost of 1 {{.*}} load
+  load i32* undef, align 4
+  ; CHECK: cost of 2 {{.*}} load
+  load i64* undef, align 4
+  ; CHECK: cost of 4 {{.*}} load
+  load i128* undef, align 4
+
+  ret i32 undef
+}
+
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index cedc682..bacc778 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -44,6 +44,10 @@ define i32 @zext_sext(<8 x i1> %in) {
   %B = zext <8 x i16> undef to <8 x i32>
   ;CHECK: cost of 1 {{.*}} sext
   %C = sext <4 x i32> undef to <4 x i64>
+  ;CHECK: cost of 8 {{.*}} sext
+  %C1 = sext <4 x i8> undef to <4 x i64>
+  ;CHECK: cost of 8 {{.*}} sext
+  %C2 = sext <4 x i16> undef to <4 x i64>
 
   ;CHECK: cost of 1 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
@@ -59,7 +63,7 @@ define i32 @zext_sext(<8 x i1> %in) {
   ret i32 undef
 }
 
-define i32 @masks(<8 x i1> %in) {
+define i32 @masks8(<8 x i1> %in) {
   ;CHECK: cost of 6 {{.*}} zext
   %Z = zext <8 x i1> %in to <8 x i32>
   ;CHECK: cost of 9 {{.*}} sext
@@ -67,3 +71,9 @@ define i32 @masks(<8 x i1> %in) {
   ret i32 undef
 }
 
+define i32 @masks4(<4 x i1> %in) {
+  ;CHECK: cost of 8 {{.*}} sext
+  %S = sext <4 x i1> %in to <4 x i64>
+  ret i32 undef
+}
+
diff --git a/test/Analysis/CostModel/X86/gep.ll b/test/Analysis/CostModel/X86/gep.ll
new file mode 100644
index 0000000..877184a
--- /dev/null
+++ b/test/Analysis/CostModel/X86/gep.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+
+define void @test_geps() {
+  ; Cost of should be zero. We expect it to be folded into
+  ; the instruction addressing mode.
+;CHECK:  cost of 0 for instruction: {{.*}} getelementptr inbounds i8*
+  %a0 = getelementptr inbounds i8* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16*
+  %a1 = getelementptr inbounds i16* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32*
+  %a2 = getelementptr inbounds i32* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64*
+  %a3 = getelementptr inbounds i64* undef, i32 0
+
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float*
+  %a4 = getelementptr inbounds float* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double*
+  %a5 = getelementptr inbounds double* undef, i32 0
+
+ ; Vector geps should also have zero cost.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
+  %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
+  %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
+  %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
+  %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>*
+  %a11 = getelementptr inbounds <4 x float>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>*
+  %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+
+
+  ret void
+}
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
index 19eebc0..d507d3f 100644
--- a/test/Analysis/Profiling/lit.local.cfg
+++ b/test/Analysis/Profiling/lit.local.cfg
@@ -1 +1,13 @@
 config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+# Most profiling tests rely on a JIT being present to gather their data; AArch64
+# doesn't have any JIT at present so they will fail when run there.
+if root.host_arch in ['AArch64']:
+    config.unsupported = True
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 1ac5927..c6cc26a 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -9,13 +9,13 @@
 ; invalid, as it's possible that this only happens after optimization on a
 ; code path which isn't ever executed.
 
-; CHECK: define void @test0_yes(i32* nocapture %p) nounwind readnone {
+; CHECK: define void @test0_yes(i32* nocapture %p) #0 {
 define void @test0_yes(i32* %p) nounwind {
   store i32 0, i32* %p, !tbaa !1
   ret void
 }
 
-; CHECK: define void @test0_no(i32* nocapture %p) nounwind {
+; CHECK: define void @test0_no(i32* nocapture %p) #1 {
 define void @test0_no(i32* %p) nounwind {
   store i32 0, i32* %p, !tbaa !2
   ret void
@@ -24,13 +24,13 @@ define void @test0_no(i32* %p) nounwind {
 ; Add the readonly attribute, since there's just a call to a function which 
 ; TBAA says doesn't modify any memory.
 
-; CHECK: define void @test1_yes(i32* nocapture %p) nounwind readonly {
+; CHECK: define void @test1_yes(i32* nocapture %p) #2 {
 define void @test1_yes(i32* %p) nounwind {
   call void @callee(i32* %p), !tbaa !1
   ret void
 }
 
-; CHECK: define void @test1_no(i32* %p) nounwind {
+; CHECK: define void @test1_no(i32* %p) #1 {
 define void @test1_no(i32* %p) nounwind {
   call void @callee(i32* %p), !tbaa !2
   ret void
@@ -43,13 +43,13 @@ define void @test1_no(i32* %p) nounwind {
 ; This is unusual, since the function is memcpy, but as above, this
 ; isn't necessarily invalid.
 
-; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) nounwind readnone {
+; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) #0 {
 define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !1
   ret void
 }
 
-; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture %q, i64 %n) nounwind {
+; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture %q, i64 %n) #1 {
 define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2
   ret void
@@ -57,13 +57,13 @@ define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind {
 
 ; Similar to the others, va_arg only accesses memory through its operand.
 
-; CHECK: define i32 @test3_yes(i8* nocapture %p) nounwind readnone {
+; CHECK: define i32 @test3_yes(i8* nocapture %p) #0 {
 define i32 @test3_yes(i8* %p) nounwind {
   %t = va_arg i8* %p, i32, !tbaa !1
   ret i32 %t
 }
 
-; CHECK: define i32 @test3_no(i8* nocapture %p) nounwind {
+; CHECK: define i32 @test3_no(i8* nocapture %p) #1 {
 define i32 @test3_no(i8* %p) nounwind {
   %t = va_arg i8* %p, i32, !tbaa !2
   ret i32 %t
@@ -72,6 +72,10 @@ define i32 @test3_no(i8* %p) nounwind {
 declare void @callee(i32* %p) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
 
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind }
+; CHECK: attributes #2 = { nounwind readonly }
+
 ; Root note.
 !0 = metadata !{ }
 
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 8f080e2..6f1c22d 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 
 ; CHECK:      define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
@@ -22,6 +22,9 @@ entry:
 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
 declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
 
+; CHECK: attributes #0 = { nounwind readonly }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{metadata !"tbaa root", null}
 !1 = metadata !{metadata !"A", metadata !0}
 !2 = metadata !{metadata !"B", metadata !0}
diff --git a/test/Assembler/2008-09-02-FunctionNotes.ll b/test/Assembler/2008-09-02-FunctionNotes.ll
index 761c91e..11a0411 100644
--- a/test/Assembler/2008-09-02-FunctionNotes.ll
+++ b/test/Assembler/2008-09-02-FunctionNotes.ll
@@ -1,14 +1,21 @@
 ; Test function attributes
-; RUN: llvm-as < %s | llvm-dis | grep inline | count 2
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
+; CHECK: define void @fn1() #0
 define void @fn1() alwaysinline {
   ret void
 }
 
+; CHECK: define void @fn2() #1
 define void @fn2() noinline {
   ret void
 }
 
+; CHECK: define void @fn3()
+; CHECK-NOT: define void @fn3() #{{.*}}
 define void @fn3() {
   ret void
 }
+
+; CHECK: attributes #0 = { alwaysinline }
+; CHECK: attributes #1 = { noinline }
diff --git a/test/Assembler/ConstantExprNoFold.ll b/test/Assembler/ConstantExprNoFold.ll
new file mode 100644
index 0000000..83e8909
--- /dev/null
+++ b/test/Assembler/ConstantExprNoFold.ll
@@ -0,0 +1,23 @@
+; This test checks to make sure that constant exprs don't fold in some simple
+; situations
+
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Even give it a datalayout, to tempt folding as much as possible.
+target datalayout = "p:32:32"
+
+@A = global i64 0
+@B = global i64 0
+
+; Don't fold this. @A might really be allocated next to @B, in which case the
+; icmp should return true. It's not valid to *dereference* in @B from a pointer
+; based on @A, but icmp isn't a dereference.
+
+; CHECK: @C = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* @B)
+@C = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* @B)
+
+; Don't fold this completely away either. In theory this could be simplified
+; to only use a gep on one side of the icmp though.
+
+; CHECK: @D = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* getelementptr inbounds (i64* @B, i64 2))
+@D = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* getelementptr inbounds (i64* @B, i64 2))
diff --git a/test/Assembler/externally-initialized.ll b/test/Assembler/externally-initialized.ll
new file mode 100644
index 0000000..4be6e62
--- /dev/null
+++ b/test/Assembler/externally-initialized.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: @G = externally_initialized global i32 0
+
+@G = externally_initialized global i32 0
diff --git a/test/Assembler/unnamed-addr.ll b/test/Assembler/unnamed-addr.ll
index 3c94ca2..35b3b39 100644
--- a/test/Assembler/unnamed-addr.ll
+++ b/test/Assembler/unnamed-addr.ll
@@ -15,4 +15,6 @@ declare i32 @zed(%struct.foobar*, %struct.foobar*)
 
 ; CHECK: @bar.d = internal unnamed_addr constant %struct.foobar zeroinitializer, align 4
 ; CHECK: @foo.d = internal constant %struct.foobar zeroinitializer, align 4
-; CHECK: define i32 @main() unnamed_addr nounwind ssp {
+; CHECK: define i32 @main() unnamed_addr #0 {
+
+; CHECK: attributes #0 = { nounwind ssp }
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index 61be4b7..b49bab9 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -860,7 +860,8 @@ let test_builder () =
   group "function attribute";
   begin
       ignore (add_function_attr fn Attribute.UWTable);
-      (* RUN: grep "X7.*uwtable" < %t.ll
+      (* RUN: grep "X7.*#0" < %t.ll
+       * RUN: grep "attributes #0 = .*uwtable.*" < %t.ll
        *)
       insist ([Attribute.UWTable] = function_attr fn);
   end;
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index 502e967..6c46e94 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -14,7 +14,7 @@ define void @f2(i8 signext)
 }
 
 define void @f3() noreturn
-; CHECK: define void @f3() noreturn
+; CHECK: define void @f3() #0
 {
         ret void;
 }
@@ -32,7 +32,7 @@ define void @f5(i8* sret)
 }
 
 define void @f6() nounwind
-; CHECK: define void @f6() nounwind
+; CHECK: define void @f6() #1
 {
         ret void;
 }
@@ -56,43 +56,43 @@ define void @f9(i8* nest)
 }
 
 define void @f10() readnone
-; CHECK: define void @f10() readnone
+; CHECK: define void @f10() #2
 {
         ret void;
 }
 
 define void @f11() readonly
-; CHECK: define void @f11() readonly
+; CHECK: define void @f11() #3
 {
         ret void;
 }
 
 define void @f12() noinline
-; CHECK: define void @f12() noinline
+; CHECK: define void @f12() #4
 {
         ret void;
 }
 
 define void @f13() alwaysinline
-; CHECK: define void @f13() alwaysinline
+; CHECK: define void @f13() #5
 {
         ret void;
 }
 
 define void @f14() optsize
-; CHECK: define void @f14() optsize
+; CHECK: define void @f14() #6
 {
         ret void;
 }
 
 define void @f15() ssp
-; CHECK: define void @f15() ssp
+; CHECK: define void @f15() #7
 {
         ret void;
 }
 
 define void @f16() sspreq
-; CHECK: define void @f16() sspreq
+; CHECK: define void @f16() #8
 {
         ret void;
 }
@@ -110,55 +110,93 @@ define void @f18(i8* nocapture)
 }
 
 define void @f19() noredzone
-; CHECK: define void @f19() noredzone
+; CHECK: define void @f19() #9
 {
         ret void;
 }
 
 define void @f20() noimplicitfloat
-; CHECK: define void @f20() noimplicitfloat
+; CHECK: define void @f20() #10
 {
         ret void;
 }
 
 define void @f21() naked
-; CHECK: define void @f21() naked
+; CHECK: define void @f21() #11
 {
         ret void;
 }
 
 define void @f22() inlinehint
-; CHECK: define void @f22() inlinehint
+; CHECK: define void @f22() #12
 {
         ret void;
 }
 
 define void @f23() alignstack(4)
-; CHECK: define void @f23() alignstack(4)
+; CHECK: define void @f23() #13
 {
         ret void;
 }
 
 define void @f24() returns_twice
-; CHECK: define void @f24() returns_twice
+; CHECK: define void @f24() #14
 {
         ret void;
 }
 
 define void @f25() uwtable
-; CHECK: define void @f25() uwtable
+; CHECK: define void @f25() #15
 {
         ret void;
 }
 
 define void @f26() nonlazybind
-; CHECK: define void @f26() nonlazybind
+; CHECK: define void @f26() #16
 {
         ret void;
 }
 
-define void @f27() address_safety
-; CHECK: define void @f27() address_safety
+define void @f27() sanitize_address
+; CHECK: define void @f27() #17
 {
         ret void;
 }
+define void @f28() sanitize_thread
+; CHECK: define void @f28() #18
+{
+        ret void;
+}
+define void @f29() sanitize_memory
+; CHECK: define void @f29() #19
+{
+        ret void;
+}
+
+define void @f30() "cpu"="cortex-a8"
+; CHECK: define void @f30() #20
+{
+        ret void;
+}
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes #1 = { nounwind }
+; CHECK: attributes #2 = { readnone }
+; CHECK: attributes #3 = { readonly }
+; CHECK: attributes #4 = { noinline }
+; CHECK: attributes #5 = { alwaysinline }
+; CHECK: attributes #6 = { optsize }
+; CHECK: attributes #7 = { ssp }
+; CHECK: attributes #8 = { sspreq }
+; CHECK: attributes #9 = { noredzone }
+; CHECK: attributes #10 = { noimplicitfloat }
+; CHECK: attributes #11 = { naked }
+; CHECK: attributes #12 = { inlinehint }
+; CHECK: attributes #13 = { alignstack=4 }
+; CHECK: attributes #14 = { returns_twice }
+; CHECK: attributes #15 = { uwtable }
+; CHECK: attributes #16 = { nonlazybind }
+; CHECK: attributes #17 = { sanitize_address }
+; CHECK: attributes #18 = { sanitize_thread }
+; CHECK: attributes #19 = { sanitize_memory }
+; CHECK: attributes #20 = { "cpu"="cortex-a8" }
diff --git a/test/Bitcode/ptest-new.ll b/test/Bitcode/ptest-new.ll
index 276fb7a..735cc9c 100644
--- a/test/Bitcode/ptest-new.ll
+++ b/test/Bitcode/ptest-new.ll
@@ -13,10 +13,13 @@ entry:
  ret i32 %add2
 }
 
-; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
-; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
-; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) #1
+; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) #1
+; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) #1
 
 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
diff --git a/test/Bitcode/ptest-old.ll b/test/Bitcode/ptest-old.ll
index fc6ed8e..fbe962f 100644
--- a/test/Bitcode/ptest-old.ll
+++ b/test/Bitcode/ptest-old.ll
@@ -13,10 +13,13 @@ entry:
  ret i32 %add2
 }
 
-; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
-; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
-; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) #1
+; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) #1
+; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) #1
 
 declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index e146ae1..3da7c18 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -24,6 +24,7 @@ set(LLVM_TEST_DEPENDS UnitTests
           llvm-nm
           llvm-objdump
           llvm-readobj
+          llvm-rtdyld
           macho-dump opt
           profile_rt-shared
           FileCheck count not
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll
new file mode 100644
index 0000000..7cb3732
--- /dev/null
+++ b/test/CodeGen/AArch64/adc.ll
@@ -0,0 +1,54 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
+; CHECK: test_simple:
+
+  %valadd = add i128 %a, %b
+; CHECK: adds [[ADDLO:x[0-9]+]], x0, x2
+; CHECK-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3
+
+  %valsub = sub i128 %valadd, %c
+; CHECK: subs x0, [[ADDLO]], x4
+; CHECK: sbcs x1, [[ADDHI]], x5
+
+  ret i128 %valsub
+; CHECK: ret
+}
+
+define i128 @test_imm(i128 %a) {
+; CHECK: test_imm:
+
+  %val = add i128 %a, 12
+; CHECK: adds x0, x0, #12
+; CHECK: adcs x1, x1, {{x[0-9]|xzr}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_shifted(i128 %a, i128 %b) {
+; CHECK: test_shifted:
+
+  %rhs = shl i128 %b, 45
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, x2, lsl #45
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_extended(i128 %a, i16 %b) {
+; CHECK: test_extended:
+
+  %ext = sext i16 %b to i128
+  %rhs = shl i128 %ext, 3
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, w2, sxth #3
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll
new file mode 100644
index 0000000..f2c74f6
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@@ -0,0 +1,295 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsl_arith:
+
+  %rhs1 = load volatile i32* @var32
+  %shift1 = shl i32 %rhs1, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18
+
+  %rhs2 = load volatile i32* @var32
+  %shift2 = shl i32 %rhs2, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+
+  %rhs3 = load volatile i32* @var32
+  %shift3 = shl i32 %rhs3, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs4 = load volatile i32* @var32
+  %shift4 = shl i32 %rhs4, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19
+
+  %lhs4a = load volatile i32* @var32
+  %shift4a = shl i32 %lhs4a, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsl #15
+
+  %rhs5 = load volatile i64* @var64
+  %shift5 = shl i64 %rhs5, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18
+
+  %rhs6 = load volatile i64* @var64
+  %shift6 = shl i64 %rhs6, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31
+
+  %rhs7 = load volatile i64* @var64
+  %shift7 = shl i64 %rhs7, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs8 = load volatile i64* @var64
+  %shift8 = shl i64 %rhs8, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19
+
+  %lhs8a = load volatile i64* @var64
+  %shift8a = shl i64 %lhs8a, 60
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsl #60
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsr_arith:
+
+  %shift1 = lshr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #18
+
+  %shift2 = lshr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #31
+
+  %shift3 = lshr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift4 = lshr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #19
+
+  %shift4a = lshr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsr #15
+
+  %shift5 = lshr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #18
+
+  %shift6 = lshr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #31
+
+  %shift7 = lshr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift8 = lshr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #19
+
+  %shift8a = lshr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsr #45
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_asr_arith:
+
+  %shift1 = ashr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #18
+
+  %shift2 = ashr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #31
+
+  %shift3 = ashr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift4 = ashr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #19
+
+  %shift4a = ashr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, asr #15
+
+  %shift5 = ashr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #18
+
+  %shift6 = ashr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #31
+
+  %shift7 = ashr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift8 = ashr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #19
+
+  %shift8a = ashr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, asr #45
+
+  ret void
+; CHECK: ret
+}
+
+define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmp:
+
+  %shift1 = shl i32 %rhs32, 13
+  %tst1 = icmp uge i32 %lhs32, %shift1
+  br i1 %tst1, label %t2, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsl #13
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %tst2 = icmp ne i32 %lhs32, %shift2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %tst3 = icmp ne i32 %lhs32, %shift3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %tst4 = icmp uge i64 %lhs64, %shift4
+  br i1 %tst4, label %t5, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsl #43
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %tst5 = icmp ne i64 %lhs64, %shift5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %tst6 = icmp ne i64 %lhs64, %shift6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
+define i32 @test_cmn(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmn:
+
+  %shift1 = shl i32 %rhs32, 13
+  %val1 = sub i32 0, %shift1
+  %tst1 = icmp uge i32 %lhs32, %val1
+  br i1 %tst1, label %t2, label %end
+  ; Important that this isn't lowered to a cmn instruction because if %rhs32 ==
+  ; 0 then the results will differ.
+; CHECK: sub [[RHS:w[0-9]+]], wzr, {{w[0-9]+}}, lsl #13
+; CHECK: cmp {{w[0-9]+}}, [[RHS]]
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %val2 = sub i32 0, %shift2
+  %tst2 = icmp ne i32 %lhs32, %val2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %val3 = sub i32 0, %shift3
+  %tst3 = icmp eq i32 %lhs32, %val3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %val4 = sub i64 0, %shift4
+  %tst4 = icmp slt i64 %lhs64, %val4
+  br i1 %tst4, label %t5, label %end
+  ; Again, it's important that cmn isn't used here in case %rhs64 == 0.
+; CHECK: sub [[RHS:x[0-9]+]], xzr, {{x[0-9]+}}, lsl #43
+; CHECK: cmp {{x[0-9]+}}, [[RHS]]
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %val5 = sub i64 0, %shift5
+  %tst5 = icmp ne i64 %lhs64, %val5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %val6 = sub i64 0, %shift6
+  %tst6 = icmp ne i64 %lhs64, %val6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
new file mode 100644
index 0000000..5148807
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -0,0 +1,127 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; Note that this should be refactored (for efficiency if nothing else)
+; when the PCS is implemented so we don't have to worry about the
+; loads and stores.
+
+@var_i32 = global i32 42
+@var_i64 = global i64 0
+
+; Add pure 12-bit immediates:
+define void @add_small() {
+; CHECK: add_small:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Add 12-bit immediates, shifted left by 12 bits
+define void @add_med() {
+; CHECK: add_med:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates
+define void @sub_small() {
+; CHECK: sub_small:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates, shifted left by 12 bits
+define void @sub_med() {
+; CHECK: sub_med:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+define void @testing() {
+; CHECK: testing:
+  %val = load i32* @var_i32
+
+; CHECK: cmp {{w[0-9]+}}, #4095
+; CHECK: b.ne .LBB4_6
+  %cmp_pos_small = icmp ne i32 %val, 4095
+  br i1 %cmp_pos_small, label %ret, label %test2
+
+test2:
+; CHECK: cmp {{w[0-9]+}}, #3567, lsl #12
+; CHECK: b.lo .LBB4_6
+  %newval2 = add i32 %val, 1
+  store i32 %newval2, i32* @var_i32
+  %cmp_pos_big = icmp ult i32 %val, 14610432
+  br i1 %cmp_pos_big, label %ret, label %test3
+
+test3:
+; CHECK: cmp {{w[0-9]+}}, #123
+; CHECK: b.lt .LBB4_6
+  %newval3 = add i32 %val, 2
+  store i32 %newval3, i32* @var_i32
+  %cmp_pos_slt = icmp slt i32 %val, 123
+  br i1 %cmp_pos_slt, label %ret, label %test4
+
+test4:
+; CHECK: cmp {{w[0-9]+}}, #321
+; CHECK: b.gt .LBB4_6
+  %newval4 = add i32 %val, 3
+  store i32 %newval4, i32* @var_i32
+  %cmp_pos_sgt = icmp sgt i32 %val, 321
+  br i1 %cmp_pos_sgt, label %ret, label %test5
+
+test5:
+; CHECK: cmn {{w[0-9]+}}, #444
+; CHECK: b.gt .LBB4_6
+  %newval5 = add i32 %val, 4
+  store i32 %newval5, i32* @var_i32
+  %cmp_neg_uge = icmp sgt i32 %val, -444
+  br i1 %cmp_neg_uge, label %ret, label %test6
+
+test6:
+  %newval6 = add i32 %val, 5
+  store i32 %newval6, i32* @var_i32
+  ret void
+
+ret:
+  ret void
+}
+; TODO: adds/subs
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
new file mode 100644
index 0000000..2dd1662
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -0,0 +1,189 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @addsub_i8rhs() {
+; CHECK: addsub_i8rhs:
+    %val8_tmp = load i8* @var8
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i8 operand.
+    %val8 = add i8 %val8_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i8 %val8 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i8 %val8 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i8 %val8 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i8 %val8 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+test2:
+    %cmp_sext = sext i8 %val8 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+define void @addsub_i16rhs() {
+; CHECK: addsub_i16rhs:
+    %val16_tmp = load i16* @var16
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i16 operand.
+    %val16 = add i16 %val16_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i16 %val16 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i16 %val16 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i16 %val16 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i16 %val16 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+test2:
+    %cmp_sext = sext i16 %val16 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
+; example), but the remaining instructions are probably not idiomatic
+; in the face of "add/sub (shifted register)" so I don't intend to.
+define void @addsub_i32rhs() {
+; CHECK: addsub_i32rhs:
+    %val32_tmp = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    %val32 = add i32 %val32_tmp, 123
+
+    %rhs64_zext = zext i32 %val32 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+
+    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+    store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+
+    %rhs64_sext = sext i32 %val32 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+
+    %rhs64_sext_shift = shl i64 %rhs64_sext, 2
+    %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+    store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+
+    ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll
new file mode 100644
index 0000000..c33b442
--- /dev/null
+++ b/test/CodeGen/AArch64/adrp-relocation.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | elf-dump | FileCheck %s
+
+define i64 @testfn() nounwind {
+entry:
+  ret i64 0
+}
+
+define i64 @foo() nounwind {
+entry:
+  %bar = alloca i64 ()*, align 8
+  store i64 ()* @testfn, i64 ()** %bar, align 8
+  %call = call i64 @testfn()
+  ret i64 %call
+}
+
+; The above should produce an ADRP/ADD pair to calculate the address of
+; testfn. The important point is that LLVM shouldn't think it can deal with the
+; relocation on the ADRP itself (even though it knows everything about the
+; relative offsets of testfn and foo) because its value depends on where this
+; object file's .text section gets relocated in memory.
+
+; CHECK: .rela.text
+
+; CHECK: # Relocation 0
+; CHECK-NEXT: (('r_offset', 0x0000000000000010)
+; CHECK-NEXT:  ('r_sym', 0x00000007)
+; CHECK-NEXT:  ('r_type', 0x00000113)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
+; CHECK-NEXT:  Relocation 1
+; CHECK-NEXT: (('r_offset', 0x0000000000000014)
+; CHECK-NEXT:  ('r_sym', 0x00000007)
+; CHECK-NEXT:  ('r_type', 0x00000115)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
new file mode 100644
index 0000000..6421769
--- /dev/null
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -0,0 +1,134 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+declare void @use_addr(i8*)
+
+define void @test_simple_alloca(i64 %n) {
+; CHECK: test_simple_alloca:
+
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  call void @use_addr(i8* %buf)
+; CHECK: bl use_addr
+
+  ret void
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+declare void @use_addr_loc(i8*, i64*)
+
+define i64 @test_alloca_with_local(i64 %n) {
+; CHECK: test_alloca_with_local:
+; CHECK: sub sp, sp, #32
+; CHECK: stp x29, x30, [sp, #16]
+
+  %loc = alloca i64
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  ; Obviously suboptimal code here, but it to get &local in x1
+; CHECK: sub [[TMP:x[0-9]+]], x29, [[LOC_FROM_FP:#[0-9]+]]
+; CHECK: add x1, [[TMP]], #0
+
+  call void @use_addr_loc(i8* %buf, i64* %loc)
+; CHECK: bl use_addr
+
+  %val = load i64* %loc
+; CHECK: sub x[[TMP:[0-9]+]], x29, [[LOC_FROM_FP]]
+; CHECK: ldr x0, [x[[TMP]]]
+
+  ret i64 %val
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+define void @test_variadic_alloca(i64 %n, ...) {
+; CHECK: test_variadic_alloca:
+
+; CHECK: sub     sp, sp, #208
+; CHECK: stp     x29, x30, [sp, #192]
+; CHECK: add     x29, sp, #192
+; CHECK: sub     x9, x29, #192
+; CHECK: add     x8, x9, #0
+; CHECK: str     q7, [x8, #112]
+; [...]
+; CHECK: str     q1, [x8, #16]
+
+  %addr = alloca i8, i64 %n
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  ret void
+; CHECK: sub sp, x29, #192
+; CHECK: ldp x29, x30, [sp, #192]
+; CHECK: add sp, sp, #208
+}
+
+define void @test_alloca_large_frame(i64 %n) {
+; CHECK: test_alloca_large_frame:
+
+; CHECK: sub sp, sp, #496
+; CHECK: stp x29, x30, [sp, #480]
+; CHECK: add x29, sp, #480
+; CHECK: sub sp, sp, #48
+; CHECK: sub sp, sp, #1953, lsl #12
+
+  %addr1 = alloca i8, i64 %n
+  %addr2 = alloca i64, i64 1000000
+
+  call void @use_addr_loc(i8* %addr1, i64* %addr2)
+
+  ret void
+; CHECK: sub sp, x29, #480
+; CHECK: ldp x29, x30, [sp, #480]
+; CHECK: add sp, sp, #496
+}
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define void @test_scoped_alloca(i64 %n) {
+; CHECK: test_scoped_alloca
+; CHECK: sub sp, sp, #32
+
+  %sp = call i8* @llvm.stacksave()
+; CHECK: mov [[SAVED_SP:x[0-9]+]], sp
+
+  %addr = alloca i8, i64 %n
+; CHECK: and [[SPDELTA:x[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
+; CHECK: mov [[OLDSP:x[0-9]+]], sp
+; CHECK: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
+; CHECK: mov sp, [[NEWSP]]
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  call void @llvm.stackrestore(i8* %sp)
+; CHECK: mov sp, [[SAVED_SP]]
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
new file mode 100644
index 0000000..e10bbb0
--- /dev/null
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -0,0 +1,231 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test checks that LLVM can do basic stripping and reapplying of branches
+; to basic blocks.
+
+declare void @test_true()
+declare void @test_false()
+
+; !0 corresponds to a branch being taken, !1 to not being takne.
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+
+define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_taken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.ne [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_nottaken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.eq [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_taken:
+  %tst = icmp eq i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbnz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_nottaken:
+  %tst = icmp eq i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_taken:
+  %tst = icmp ne i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_nottaken:
+  %tst = icmp ne i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbnz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp eq i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbnz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp eq i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+
+define void @test_TBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp ne i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp ne i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbnz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
new file mode 100644
index 0000000..3c03e47
--- /dev/null
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i32 @foo(i32* %var, i1 %cond) {
+; CHECK: foo:
+  br i1 %cond, label %atomic_ver, label %simple_ver
+simple_ver:
+  %oldval = load i32* %var
+  %newval = add nsw i32 %oldval, -1
+  store i32 %newval, i32* %var
+  br label %somewhere
+atomic_ver:
+  %val = atomicrmw add i32* %var, i32 -1 seq_cst
+  br label %somewhere
+; CHECK: dmb
+; CHECK: ldxr
+; CHECK: dmb
+  ; The key point here is that the second dmb isn't immediately followed by the
+  ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
+  ; with isBarrier. For now, look for something that looks like "somewhere".
+; CHECK-NEXT: mov
+somewhere:
+  %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
+  ret i32 %combined
+}
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
new file mode 100644
index 0000000..bcb1a6f
--- /dev/null
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -0,0 +1,1099 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_add_i8:
+   %old = atomicrmw add i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_add_i16:
+   %old = atomicrmw add i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_add_i32:
+   %old = atomicrmw add i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_add_i64:
+   %old = atomicrmw add i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i8:
+   %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i16:
+   %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i32:
+   %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i64:
+   %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_and_i8:
+   %old = atomicrmw and i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_and_i16:
+   %old = atomicrmw and i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_and_i32:
+   %old = atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_and_i64:
+   %old = atomicrmw and i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_or_i8:
+   %old = atomicrmw or i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_or_i16:
+   %old = atomicrmw or i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_or_i32:
+   %old = atomicrmw or i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_or_i64:
+   %old = atomicrmw or i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i8:
+   %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i16:
+   %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i32:
+   %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i64:
+   %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i8:
+   %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i16:
+   %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i32:
+   %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i64:
+   %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+
+define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_min_i8:
+   %old = atomicrmw min i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_min_i16:
+   %old = atomicrmw min i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_min_i32:
+   %old = atomicrmw min i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_min_i64:
+   %old = atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_max_i8:
+   %old = atomicrmw max i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_max_i16:
+   %old = atomicrmw max i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_max_i32:
+   %old = atomicrmw max i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_max_i64:
+   %old = atomicrmw max i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i8:
+   %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i16:
+   %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i32:
+   %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i64:
+   %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i8:
+   %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i16:
+   %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i32:
+   %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i64:
+   %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i8:
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i16:
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i32:
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i64:
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: b.ne [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_monotonic_i8() nounwind {
+; CHECK: test_atomic_load_monotonic_i8:
+  %val = load atomic i8* @var8 monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK-NOT: dmb
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
+; CHECK: test_atomic_load_monotonic_regoff_i8:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  %val = load atomic i8* %addr monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK: ldrb w0, [x0, x1]
+; CHECK-NOT: dmb
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_acquire_i8() nounwind {
+; CHECK: test_atomic_load_acquire_i8:
+  %val = load atomic i8* @var8 acquire, align 1
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: ldarb w0, [x[[ADDR]]]
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_seq_cst_i8() nounwind {
+; CHECK: test_atomic_load_seq_cst_i8:
+  %val = load atomic i8* @var8 seq_cst, align 1
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK: dmb ish
+  ret i8 %val
+}
+
+define i16 @test_atomic_load_monotonic_i16() nounwind {
+; CHECK: test_atomic_load_monotonic_i16:
+  %val = load atomic i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16]
+; CHECK-NOT: dmb
+
+  ret i16 %val
+}
+
+define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
+; CHECK: test_atomic_load_monotonic_regoff_i32:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  %val = load atomic i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK: ldr w0, [x0, x1]
+; CHECK-NOT: dmb
+
+  ret i32 %val
+}
+
+define i64 @test_atomic_load_seq_cst_i64() nounwind {
+; CHECK: test_atomic_load_seq_cst_i64:
+  %val = load atomic i64* @var64 seq_cst, align 8
+; CHECK: adrp x[[HIADDR:[0-9]+]], var64
+; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64]
+; CHECK: dmb ish
+  ret i64 %val
+}
+
+define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_i8:
+  store atomic i8 %val, i8* @var8 monotonic, align 1
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: strb w0, [x[[HIADDR]], #:lo12:var8]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_regoff_i8:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  store atomic i8 %val, i8* %addr monotonic, align 1
+; CHECK: strb w2, [x0, x1]
+
+  ret void
+}
+define void @test_atomic_store_release_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_release_i8:
+  store atomic i8 %val, i8* @var8 release, align 1
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: stlrb w0, [x[[ADDR]]]
+
+  ret void
+}
+
+define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_seq_cst_i8:
+  store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: stlrb w0, [x[[ADDR]]]
+; CHECK: dmb ish
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_i16:
+  store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_regoff_i32:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK: str w2, [x0, x1]
+
+  ret void
+}
+
+define void @test_atomic_store_release_i64(i64 %val) nounwind {
+; CHECK: test_atomic_store_release_i64:
+  store atomic i64 %val, i64* @var64 release, align 8
+; CHECK: adrp [[HIADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK: stlr x0, [x[[ADDR]]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
new file mode 100644
index 0000000..da94041
--- /dev/null
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -filetype=obj %s -o -| llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var = global i32 0
+
+; CHECK-ELF: RELOCATION RECORDS FOR [.text]
+
+define i32 @get_globalvar() {
+; CHECK: get_globalvar:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var]
+; CHECK: ldr w0, [x[[GOTLOC]]]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32 %val
+}
+
+define i32* @get_globalvaraddr() {
+; CHECK: get_globalvaraddr:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32* @var
+}
+
+@hiddenvar = hidden global i32 0
+
+define i32 @get_hiddenvar() {
+; CHECK: get_hiddenvar:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
+; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar]
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
+  ret i32 %val
+}
+
+define i32* @get_hiddenvaraddr() {
+; CHECK: get_hiddenvaraddr:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
+; CHECK: add x0, [[HI]], #:lo12:hiddenvar
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
+  ret i32* @hiddenvar
+}
+
+define void()* @get_func() {
+; CHECK: get_func:
+
+  ret void()* bitcast(void()*()* @get_func to void()*)
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func]
+
+  ; Particularly important that the ADRP gets a relocation, LLVM tends to think
+  ; it can relax it because it knows where get_func is. It can't!
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE get_func
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC get_func
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll
new file mode 100644
index 0000000..d1191f6
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s
+
+; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one
+; point, in the edge case where lsb = 0. Just make sure.
+
+define void @test_bfi0(i32* %existing, i32* %new) {
+; CHECK: bfxil {{w[0-9]+}}, {{w[0-9]+}}, #0, #18
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 4294705152 ; 0xfffc_0000
+
+  %newval = load volatile i32* %new
+  %newval_masked = and i32 %newval, 262143 ; = 0x0003_ffff
+
+  %combined = or i32 %newval_masked, %oldval_keep
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
new file mode 100644
index 0000000..3e871b9
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -0,0 +1,193 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; First, a simple example from Clang. The registers could plausibly be
+; different, but probably won't be.
+
+%struct.foo = type { i8, [2 x i8], i8 }
+
+define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
+; CHECK: from_clang:
+; CHECK: bfi w0, w1, #3, #4
+; CHECK-NEXT: ret
+
+entry:
+  %f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0
+  %tmp.sroa.0.0.extract.trunc = trunc i64 %f.coerce.fca.0.extract to i32
+  %bf.value = shl i32 %n, 3
+  %0 = and i32 %bf.value, 120
+  %f.sroa.0.0.insert.ext.masked = and i32 %tmp.sroa.0.0.extract.trunc, 135
+  %1 = or i32 %f.sroa.0.0.insert.ext.masked, %0
+  %f.sroa.0.0.extract.trunc = zext i32 %1 to i64
+  %tmp1.sroa.1.1.insert.insert = and i64 %f.coerce.fca.0.extract, 4294967040
+  %tmp1.sroa.0.0.insert.insert = or i64 %f.sroa.0.0.extract.trunc, %tmp1.sroa.1.1.insert.insert
+  %.fca.0.insert = insertvalue [1 x i64] undef, i64 %tmp1.sroa.0.0.insert.insert, 0
+  ret [1 x i64] %.fca.0.insert
+}
+
+define void @test_whole32(i32* %existing, i32* %new) {
+; CHECK: test_whole32:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 26
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_whole64(i64* %existing, i64* %new) {
+; CHECK: test_whole64:
+; CHECK: bfi {{x[0-9]+}}, {{x[0-9]+}}, #26, #14
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 26
+  %newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_whole32_from64(i64* %existing, i64* %new) {
+; CHECK: test_whole32_from64:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
+
+  %newval = load volatile i64* %new
+  %newval_masked = and i64 %newval, 65535 ; = 0xffff
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_32bit_masked(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_masked:
+; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, [[INSERT]], #0xff
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x78
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_64bit_masked(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_masked:
+; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
+; CHECK: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 40
+  %newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
+
+  %combined = or i64 %newval_masked, %oldval_keep
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
+define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_complexmask:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 647 ; = 0x287
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Neither mask is is a contiguous set of 1s. BFI can't be used
+define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 632 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Ditto
+define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 3
+  %newval_masked = and i64 %newval_shifted, 664 ; = 0x278
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Bitfield insert where there's a left-over shr needed at the beginning
+; (e.g. result of str.bf1 = str.bf2)
+define void @test_32bit_with_shr(i32* %existing, i32* %new) {
+; CHECK: test_32bit_with_shr:
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load i32* %new
+  %newval_shifted = shl i32 %newval, 12
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+; CHECK: lsr [[BIT:w[0-9]+]], {{w[0-9]+}}, #14
+; CHECK: bfi {{w[0-9]}}, [[BIT]], #26, #5
+
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
new file mode 100644
index 0000000..36d337e
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -0,0 +1,218 @@
+
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_extendb(i8 %var) {
+; CHECK: test_extendb:
+
+  %sxt32 = sext i8 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i8 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i8 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+
+  %uxt64 = zext i8 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxtb {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendh(i16 %var) {
+; CHECK: test_extendh:
+
+  %sxt32 = sext i16 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i16 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i16 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+
+  %uxt64 = zext i16 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxth {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendw(i32 %var) {
+; CHECK: test_extendw:
+
+  %sxt64 = sext i32 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+
+  %uxt64 = zext i32 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32
+  ret void
+}
+
+define void @test_shifts(i32 %val32, i64 %val64) {
+; CHECK: test_shifts:
+
+  %shift1 = ashr i32 %val32, 31
+  store volatile i32 %shift1, i32* @var32
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift2 = lshr i32 %val32, 8
+  store volatile i32 %shift2, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #8
+
+  %shift3 = shl i32 %val32, 1
+  store volatile i32 %shift3, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #1
+
+  %shift4 = ashr i64 %val64, 31
+  store volatile i64 %shift4, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #31
+
+  %shift5 = lshr i64 %val64, 8
+  store volatile i64 %shift5, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #8
+
+  %shift6 = shl i64 %val64, 63
+  store volatile i64 %shift6, i64* @var64
+; CHECK: lsl {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift7 = ashr i64 %val64, 63
+  store volatile i64 %shift7, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift8 = lshr i64 %val64, 63
+  store volatile i64 %shift8, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift9 = lshr i32 %val32, 31
+  store volatile i32 %shift9, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift10 = shl i32 %val32, 31
+  store volatile i32 %shift10, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  ret void
+}
+
+; LLVM can produce in-register extensions taking place entirely with
+; 64-bit registers too.
+define void @test_sext_inreg_64(i64 %in) {
+; CHECK: test_sext_inreg_64:
+
+; i1 doesn't have an official alias, but crops up and is handled by
+; the bitfield ops.
+  %trunc_i1 = trunc i64 %in to i1
+  %sext_i1 = sext i1 %trunc_i1 to i64
+  store volatile i64 %sext_i1, i64* @var64
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  %trunc_i8 = trunc i64 %in to i8
+  %sext_i8 = sext i8 %trunc_i8 to i64
+  store volatile i64 %sext_i8, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i16 = trunc i64 %in to i16
+  %sext_i16 = sext i16 %trunc_i16 to i64
+  store volatile i64 %sext_i16, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i32 = trunc i64 %in to i32
+  %sext_i32 = sext i32 %trunc_i32 to i64
+  store volatile i64 %sext_i32, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+; These instructions don't actually select to official bitfield
+; operations, but it's important that we select them somehow:
+define void @test_zext_inreg_64(i64 %in) {
+; CHECK: test_zext_inreg_64:
+
+  %trunc_i8 = trunc i64 %in to i8
+  %zext_i8 = zext i8 %trunc_i8 to i64
+  store volatile i64 %zext_i8, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+
+  %trunc_i16 = trunc i64 %in to i16
+  %zext_i16 = zext i16 %trunc_i16 to i64
+  store volatile i64 %zext_i16, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+
+  %trunc_i32 = trunc i64 %in to i32
+  %zext_i32 = zext i32 %trunc_i32 to i64
+  store volatile i64 %zext_i32, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffffffff
+
+  ret void
+}
+
+define i64 @test_sext_inreg_from_32(i32 %in) {
+; CHECK: test_sext_inreg_from_32:
+
+  %small = trunc i32 %in to i1
+  %ext = sext i1 %small to i64
+
+  ; Different registers are of course, possible, though suboptimal. This is
+  ; making sure that a 64-bit "(sext_inreg (anyext GPR32), i1)" uses the 64-bit
+  ; sbfx rather than just 32-bits.
+; CHECK: sbfx x0, x0, #0, #1
+  ret i64 %ext
+}
+
+
+define i32 @test_ubfx32(i32* %addr) {
+; CHECK: test_ubfx32:
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3
+
+   %fields = load i32* %addr
+   %shifted = lshr i32 %fields, 23
+   %masked = and i32 %shifted, 7
+   ret i32 %masked
+}
+
+define i64 @test_ubfx64(i64* %addr) {
+; CHECK: test_ubfx64:
+; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
+
+   %fields = load i64* %addr
+   %shifted = lshr i64 %fields, 25
+   %masked = and i64 %shifted, 1023
+   ret i64 %masked
+}
+
+define i32 @test_sbfx32(i32* %addr) {
+; CHECK: test_sbfx32:
+; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3
+
+   %fields = load i32* %addr
+   %shifted = shl i32 %fields, 23
+   %extended = ashr i32 %shifted, 29
+   ret i32 %extended
+}
+
+define i64 @test_sbfx64(i64* %addr) {
+; CHECK: test_sbfx64:
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63
+
+   %fields = load i64* %addr
+   %shifted = shl i64 %fields, 1
+   %extended = ashr i64 %shifted, 1
+   ret i64 %extended
+}
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
new file mode 100644
index 0000000..3d0a5cf
--- /dev/null
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@addr = global i8* null
+
+define void @test_blockaddress() {
+; CHECK: test_blockaddress:
+  store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr
+  %val = load volatile i8** @addr
+  indirectbr i8* %val, [label %block]
+; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]]
+; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], #:lo12:[[DEST_LBL]]
+; CHECK: str [[DEST]],
+; CHECK: ldr [[NEWDEST:x[0-9]+]]
+; CHECK: br [[NEWDEST]]
+
+block:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll
new file mode 100644
index 0000000..5c7640b
--- /dev/null
+++ b/test/CodeGen/AArch64/bool-loads.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+@var = global i1 0
+
+define i32 @test_sextloadi32() {
+; CHECK: test_sextloadi32
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i32 %ret
+; CHECK: ret
+}
+
+define i64 @test_sextloadi64() {
+; CHECK: test_sextloadi64
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i64 %ret
+; CHECK: ret
+}
+
+define i32 @test_zextloadi32() {
+; CHECK: test_zextloadi32
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i32 %ret
+; CHECK-NEXT: ret
+}
+
+define i64 @test_zextloadi64() {
+; CHECK: test_zextloadi64
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i64 %ret
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
new file mode 100644
index 0000000..38ed473
--- /dev/null
+++ b/test/CodeGen/AArch64/breg.ll
@@ -0,0 +1,17 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@stored_label = global i8* null
+
+define void @foo() {
+; CHECK: foo:
+  %lab = load i8** @stored_label
+  indirectbr i8* %lab, [label  %otherlab, label %retlab]
+; CHECK: adrp {{x[0-9]+}}, stored_label
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:stored_label]
+; CHECK: br {{x[0-9]+}}
+
+otherlab:
+  ret void
+retlab:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll
new file mode 100644
index 0000000..9dddf74
--- /dev/null
+++ b/test/CodeGen/AArch64/callee-save.ll
@@ -0,0 +1,86 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var = global float 0.0
+
+define void @foo() {
+; CHECK: foo:
+
+; CHECK stp d14, d15, [sp
+; CHECK stp d12, d13, [sp
+; CHECK stp d10, d11, [sp
+; CHECK stp d8, d9, [sp
+
+  ; Create lots of live variables to exhaust the supply of
+  ; caller-saved registers
+  %val1 = load volatile float* @var
+  %val2 = load volatile float* @var
+  %val3 = load volatile float* @var
+  %val4 = load volatile float* @var
+  %val5 = load volatile float* @var
+  %val6 = load volatile float* @var
+  %val7 = load volatile float* @var
+  %val8 = load volatile float* @var
+  %val9 = load volatile float* @var
+  %val10 = load volatile float* @var
+  %val11 = load volatile float* @var
+  %val12 = load volatile float* @var
+  %val13 = load volatile float* @var
+  %val14 = load volatile float* @var
+  %val15 = load volatile float* @var
+  %val16 = load volatile float* @var
+  %val17 = load volatile float* @var
+  %val18 = load volatile float* @var
+  %val19 = load volatile float* @var
+  %val20 = load volatile float* @var
+  %val21 = load volatile float* @var
+  %val22 = load volatile float* @var
+  %val23 = load volatile float* @var
+  %val24 = load volatile float* @var
+  %val25 = load volatile float* @var
+  %val26 = load volatile float* @var
+  %val27 = load volatile float* @var
+  %val28 = load volatile float* @var
+  %val29 = load volatile float* @var
+  %val30 = load volatile float* @var
+  %val31 = load volatile float* @var
+  %val32 = load volatile float* @var
+
+  store volatile float %val1, float* @var
+  store volatile float %val2, float* @var
+  store volatile float %val3, float* @var
+  store volatile float %val4, float* @var
+  store volatile float %val5, float* @var
+  store volatile float %val6, float* @var
+  store volatile float %val7, float* @var
+  store volatile float %val8, float* @var
+  store volatile float %val9, float* @var
+  store volatile float %val10, float* @var
+  store volatile float %val11, float* @var
+  store volatile float %val12, float* @var
+  store volatile float %val13, float* @var
+  store volatile float %val14, float* @var
+  store volatile float %val15, float* @var
+  store volatile float %val16, float* @var
+  store volatile float %val17, float* @var
+  store volatile float %val18, float* @var
+  store volatile float %val19, float* @var
+  store volatile float %val20, float* @var
+  store volatile float %val21, float* @var
+  store volatile float %val22, float* @var
+  store volatile float %val23, float* @var
+  store volatile float %val24, float* @var
+  store volatile float %val25, float* @var
+  store volatile float %val26, float* @var
+  store volatile float %val27, float* @var
+  store volatile float %val28, float* @var
+  store volatile float %val29, float* @var
+  store volatile float %val30, float* @var
+  store volatile float %val31, float* @var
+  store volatile float %val32, float* @var
+
+; CHECK: ldp     d8, d9, [sp
+; CHECK: ldp     d10, d11, [sp
+; CHECK: ldp     d12, d13, [sp
+; CHECK: ldp     d14, d15, [sp
+  ret void
+}
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
new file mode 100644
index 0000000..4213110
--- /dev/null
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -0,0 +1,38 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @foo() {
+; CHECK: foo:
+
+  %val1 = load volatile i32* @var32
+  %tst1 = icmp eq i32 %val1, 0
+  br i1 %tst1, label %end, label %test2
+; CHECK: cbz {{w[0-9]+}}, .LBB
+
+test2:
+  %val2 = load volatile i32* @var32
+  %tst2 = icmp ne i32 %val2, 0
+  br i1 %tst2, label %end, label %test3
+; CHECK: cbnz {{w[0-9]+}}, .LBB
+
+test3:
+  %val3 = load volatile i64* @var64
+  %tst3 = icmp eq i64 %val3, 0
+  br i1 %tst3, label %end, label %test4
+; CHECK: cbz {{x[0-9]+}}, .LBB
+
+test4:
+  %val4 = load volatile i64* @var64
+  %tst4 = icmp ne i64 %val4, 0
+  br i1 %tst4, label %end, label %test5
+; CHECK: cbnz {{x[0-9]+}}, .LBB
+
+test5:
+  store volatile i64 %val4, i64* @var64
+  ret void
+
+end:
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
new file mode 100644
index 0000000..3051cf5
--- /dev/null
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -0,0 +1,213 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csel:
+
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, i32 42, i32 52
+  store i32 %val1, i32* @var32
+; CHECK: movz [[W52:w[0-9]+]], #52
+; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi
+
+  %rhs64 = sext i32 %rhs32 to i64
+  %tst2 = icmp sle i64 %lhs64, %rhs64
+  %val2 = select i1 %tst2, i64 %lhs64, i64 %rhs64
+  store i64 %val2, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
+; CHECK: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
+; CHECK: csel {{x[0-9]+}}, [[LHS]], [[EXT_RHS]], le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %rhs64) {
+; CHECK: test_floatcsel:
+
+  %tst1 = fcmp one float %lhs32, %rhs32
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+  %val1 = select i1 %tst1, i32 42, i32 52
+  store i32 %val1, i32* @var32
+; CHECK: movz [[W52:w[0-9]+]], #52
+; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: csel [[MAYBETRUE:w[0-9]+]], [[W42]], [[W52]], mi
+; CHECK: csel {{w[0-9]+}}, [[W42]], [[MAYBETRUE]], gt
+
+
+  %tst2 = fcmp ueq double %lhs64, %rhs64
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+  %val2 = select i1 %tst2, i64 9, i64 15
+  store i64 %val2, i64* @var64
+; CHECK: movz [[CONST15:x[0-9]+]], #15
+; CHECK: movz [[CONST9:x[0-9]+]], #9
+; CHECK: csel [[MAYBETRUE:x[0-9]+]], [[CONST9]], [[CONST15]], eq
+; CHECK: csel {{x[0-9]+}}, [[CONST9]], [[MAYBETRUE]], vs
+
+  ret void
+; CHECK: ret
+}
+
+
+define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csinc:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = add i32 %rhs32, 1
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csinc {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = add i32 %rhs32, 1
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = add i64 %rhs3, 1
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = add i64 %rhs4, 1
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csinv:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = xor i32 -1, %rhs32
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csinv {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = xor i32 -1, %rhs32
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = xor i64 -1, %rhs3
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = xor i64 -1, %rhs4
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csneg:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = sub i32 0, %rhs32
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csneg {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = sub i32 0, %rhs32
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = sub i64 0, %rhs3
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = sub i64 0, %rhs4
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_cset(i32 %lhs, i32 %rhs, i64 %lhs64) {
+; CHECK: test_cset:
+
+; N.b. code is not optimal here (32-bit csinc would be better) but
+; incoming DAG is too complex
+  %tst1 = icmp eq i32 %lhs, %rhs
+  %val1 = zext i1 %tst1 to i32
+  store i32 %val1, i32* @var32
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ne
+
+  %rhs64 = sext i32 %rhs to i64
+  %tst2 = icmp ule i64 %lhs64, %rhs64
+  %val2 = zext i1 %tst2 to i64
+  store i64 %val2, i64* @var64
+; CHECK: csinc {{w[0-9]+}}, wzr, wzr, hi
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csetm(i32 %lhs, i32 %rhs, i64 %lhs64) {
+; CHECK: test_csetm:
+
+  %tst1 = icmp eq i32 %lhs, %rhs
+  %val1 = sext i1 %tst1 to i32
+  store i32 %val1, i32* @var32
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: csinv {{w[0-9]+}}, wzr, wzr, ne
+
+  %rhs64 = sext i32 %rhs to i64
+  %tst2 = icmp ule i64 %lhs64, %rhs64
+  %val2 = sext i1 %tst2 to i64
+  store i64 %val2, i64* @var64
+; CHECK: csinv {{x[0-9]+}}, xzr, xzr, hi
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
new file mode 100644
index 0000000..f5d5759
--- /dev/null
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
+; CHECK: test_select_i32:
+  %val = select i1 %bit, i32 %a, i32 %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: csel w0, w1, w2, ne
+
+  ret i32 %val
+}
+
+define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
+; CHECK: test_select_i64:
+  %val = select i1 %bit, i64 %a, i64 %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: csel x0, x1, x2, ne
+
+  ret i64 %val
+}
+
+define float @test_select_float(i1 %bit, float %a, float %b) {
+; CHECK: test_select_float:
+  %val = select i1 %bit, float %a, float %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+
+  ret float %val
+}
+
+define double @test_select_double(i1 %bit, double %a, double %b) {
+; CHECK: test_select_double:
+  %val = select i1 %bit, double %a, double %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: fcsel d0, d0, d1, ne
+
+  ret double %val
+}
+
+define i32 @test_brcond(i1 %bit) {
+; CHECK: test_brcond:
+  br i1 %bit, label %true, label %false
+; CHECK: tbz {{w[0-9]+}}, #0, .LBB
+
+true:
+  ret i32 0
+false:
+  ret i32 42
+}
+
+define i1 @test_setcc_float(float %lhs, float %rhs) {
+; CHECK: test_setcc_float
+  %val = fcmp oeq float %lhs, %rhs
+; CHECK: fcmp s0, s1
+; CHECK: csinc w0, wzr, wzr, ne
+  ret i1 %val
+}
+
+define i1 @test_setcc_double(double %lhs, double %rhs) {
+; CHECK: test_setcc_double
+  %val = fcmp oeq double %lhs, %rhs
+; CHECK: fcmp d0, d1
+; CHECK: csinc w0, wzr, wzr, ne
+  ret i1 %val
+}
+
+define i1 @test_setcc_i32(i32 %lhs, i32 %rhs) {
+; CHECK: test_setcc_i32
+  %val = icmp ugt i32 %lhs, %rhs
+; CHECK: cmp w0, w1
+; CHECK: csinc w0, wzr, wzr, ls
+  ret i1 %val
+}
+
+define i1 @test_setcc_i64(i64 %lhs, i64 %rhs) {
+; CHECK: test_setcc_i64
+  %val = icmp ne i64 %lhs, %rhs
+; CHECK: cmp x0, x1
+; CHECK: csinc w0, wzr, wzr, eq
+  ret i1 %val
+}
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
new file mode 100644
index 0000000..c40d393
--- /dev/null
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -0,0 +1,163 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) {
+; CHECK: test_madd32:
+  %mid = mul i32 %val1, %val2
+  %res = add i32 %val0, %mid
+; CHECK: madd {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_madd64(i64 %val0, i64 %val1, i64 %val2) {
+; CHECK: test_madd64:
+  %mid = mul i64 %val1, %val2
+  %res = add i64 %val0, %mid
+; CHECK: madd {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i32 @test_msub32(i32 %val0, i32 %val1, i32 %val2) {
+; CHECK: test_msub32:
+  %mid = mul i32 %val1, %val2
+  %res = sub i32 %val0, %mid
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_msub64(i64 %val0, i64 %val1, i64 %val2) {
+; CHECK: test_msub64:
+  %mid = mul i64 %val1, %val2
+  %res = sub i64 %val0, %mid
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smaddl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_smaddl:
+  %ext1 = sext i32 %val1 to i64
+  %ext2 = sext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = add i64 %acc, %prod
+; CHECK: smaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smsubl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_smsubl:
+  %ext1 = sext i32 %val1 to i64
+  %ext2 = sext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 %acc, %prod
+; CHECK: smsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umaddl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_umaddl:
+  %ext1 = zext i32 %val1 to i64
+  %ext2 = zext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = add i64 %acc, %prod
+; CHECK: umaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umsubl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_umsubl:
+  %ext1 = zext i32 %val1 to i64
+  %ext2 = zext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 %acc, %prod
+; CHECK: umsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smulh(i64 %lhs, i64 %rhs) {
+; CHECK: test_smulh:
+  %ext1 = sext i64 %lhs to i128
+  %ext2 = sext i64 %rhs to i128
+  %res = mul i128 %ext1, %ext2
+  %high = lshr i128 %res, 64
+  %val = trunc i128 %high to i64
+; CHECK: smulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %val
+}
+
+define i64 @test_umulh(i64 %lhs, i64 %rhs) {
+; CHECK: test_umulh:
+  %ext1 = zext i64 %lhs to i128
+  %ext2 = zext i64 %rhs to i128
+  %res = mul i128 %ext1, %ext2
+  %high = lshr i128 %res, 64
+  %val = trunc i128 %high to i64
+; CHECK: umulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %val
+}
+
+define i32 @test_mul32(i32 %lhs, i32 %rhs) {
+; CHECK: test_mul32:
+  %res = mul i32 %lhs, %rhs
+; CHECK: mul {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_mul64(i64 %lhs, i64 %rhs) {
+; CHECK: test_mul64:
+  %res = mul i64 %lhs, %rhs
+; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i32 @test_mneg32(i32 %lhs, i32 %rhs) {
+; CHECK: test_mneg32:
+  %prod = mul i32 %lhs, %rhs
+  %res = sub i32 0, %prod
+; CHECK: mneg {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_mneg64(i64 %lhs, i64 %rhs) {
+; CHECK: test_mneg64:
+  %prod = mul i64 %lhs, %rhs
+  %res = sub i64 0, %prod
+; CHECK: mneg {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smull(i32 %lhs, i32 %rhs) {
+; CHECK: test_smull:
+  %ext1 = sext i32 %lhs to i64
+  %ext2 = sext i32 %rhs to i64
+  %res = mul i64 %ext1, %ext2
+; CHECK: smull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umull(i32 %lhs, i32 %rhs) {
+; CHECK: test_umull:
+  %ext1 = zext i32 %lhs to i64
+  %ext2 = zext i32 %rhs to i64
+  %res = mul i64 %ext1, %ext2
+; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smnegl(i32 %lhs, i32 %rhs) {
+; CHECK: test_smnegl:
+  %ext1 = sext i32 %lhs to i64
+  %ext2 = sext i32 %rhs to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 0, %prod
+; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umnegl(i32 %lhs, i32 %rhs) {
+; CHECK: test_umnegl:
+  %ext1 = zext i32 %lhs to i64
+  %ext2 = zext i32 %rhs to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 0, %prod
+; CHECK: umnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll
new file mode 100644
index 0000000..83aa8b4
--- /dev/null
+++ b/test/CodeGen/AArch64/dp1.ll
@@ -0,0 +1,152 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @rev_i32() {
+; CHECK: rev_i32:
+    %val0_tmp = load i32* @var32
+    %val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp)
+; CHECK: rev	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val1_tmp, i32* @var32
+    ret void
+}
+
+define void @rev_i64() {
+; CHECK: rev_i64:
+    %val0_tmp = load i64* @var64
+    %val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp)
+; CHECK: rev	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val1_tmp, i64* @var64
+    ret void
+}
+
+define void @rev32_i64() {
+; CHECK: rev32_i64:
+    %val0_tmp = load i64* @var64
+    %val1_tmp = shl i64 %val0_tmp, 32
+    %val5_tmp = sub i64 64, 32
+    %val2_tmp = lshr i64 %val0_tmp, %val5_tmp
+    %val3_tmp = or i64 %val1_tmp, %val2_tmp
+    %val4_tmp = call i64 @llvm.bswap.i64(i64 %val3_tmp)
+; CHECK: rev32	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @rev16_i32() {
+; CHECK: rev16_i32:
+    %val0_tmp = load i32* @var32
+    %val1_tmp = shl i32 %val0_tmp, 16
+    %val2_tmp = lshr i32 %val0_tmp, 16
+    %val3_tmp = or i32 %val1_tmp, %val2_tmp
+    %val4_tmp = call i32 @llvm.bswap.i32(i32 %val3_tmp)
+; CHECK: rev16	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zerodef_i32() {
+; CHECK: clz_zerodef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0)
+; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zerodef_i64() {
+; CHECK: clz_zerodef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0)
+; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @clz_zeroundef_i32() {
+; CHECK: clz_zeroundef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1)
+; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zeroundef_i64() {
+; CHECK: clz_zeroundef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1)
+; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @cttz_zerodef_i32() {
+; CHECK: cttz_zerodef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0)
+; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
+; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @cttz_zerodef_i64() {
+; CHECK: cttz_zerodef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0)
+; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
+; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @cttz_zeroundef_i32() {
+; CHECK: cttz_zeroundef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1)
+; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
+; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @cttz_zeroundef_i64() {
+; CHECK: cttz_zeroundef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1)
+; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
+; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+; These two are just compilation tests really: the operation's set to Expand in
+; ISelLowering.
+define void @ctpop_i32() {
+; CHECK: ctpop_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp)
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @ctpop_i64() {
+; CHECK: ctpop_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctpop.i64(i64 %val0_tmp)
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare i32  @llvm.ctlz.i32 (i32, i1)
+declare i64  @llvm.ctlz.i64 (i64, i1)
+declare i32  @llvm.cttz.i32 (i32, i1)
+declare i64  @llvm.cttz.i64 (i64, i1)
+declare i32  @llvm.ctpop.i32 (i32)
+declare i64  @llvm.ctpop.i64 (i64)
+
diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll
new file mode 100644
index 0000000..4c740f6
--- /dev/null
+++ b/test/CodeGen/AArch64/dp2.ll
@@ -0,0 +1,169 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32_0 = global i32 0
+@var32_1 = global i32 0
+@var64_0 = global i64 0
+@var64_1 = global i64 0
+
+define void @rorv_i64() {
+; CHECK: rorv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val2_tmp = sub i64 64, %val1_tmp
+    %val3_tmp = shl i64 %val0_tmp, %val2_tmp
+    %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
+    %val5_tmp = or i64 %val3_tmp, %val4_tmp
+; CHECK: ror	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val5_tmp, i64* @var64_0
+    ret void
+}
+
+define void @asrv_i64() {
+; CHECK: asrv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = ashr i64 %val0_tmp, %val1_tmp
+; CHECK: asr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+define void @lsrv_i64() {
+; CHECK: lsrv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
+; CHECK: lsr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_0
+    ret void
+}
+
+define void @lslv_i64() {
+; CHECK: lslv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = shl i64 %val0_tmp, %val1_tmp
+; CHECK: lsl	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+define void @udiv_i64() {
+; CHECK: udiv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = udiv i64 %val0_tmp, %val1_tmp
+; CHECK: udiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_0
+    ret void
+}
+
+define void @sdiv_i64() {
+; CHECK: sdiv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = sdiv i64 %val0_tmp, %val1_tmp
+; CHECK: sdiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+
+define void @lsrv_i32() {
+; CHECK: lsrv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = lshr i32 %val0_tmp, %val2_tmp
+; CHECK: lsr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_0
+    ret void
+}
+
+define void @lslv_i32() {
+; CHECK: lslv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = shl i32 %val0_tmp, %val2_tmp
+; CHECK: lsl	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @rorv_i32() {
+; CHECK: rorv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val6_tmp = load i32* @var32_1
+    %val1_tmp = add i32 1, %val6_tmp
+    %val2_tmp = sub i32 32, %val1_tmp
+    %val3_tmp = shl i32 %val0_tmp, %val2_tmp
+    %val4_tmp = lshr i32 %val0_tmp, %val1_tmp
+    %val5_tmp = or i32 %val3_tmp, %val4_tmp
+; CHECK: ror	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val5_tmp, i32* @var32_0
+    ret void
+}
+
+define void @asrv_i32() {
+; CHECK: asrv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = ashr i32 %val0_tmp, %val2_tmp
+; CHECK: asr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @sdiv_i32() {
+; CHECK: sdiv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val4_tmp = sdiv i32 %val0_tmp, %val1_tmp
+; CHECK: sdiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @udiv_i32() {
+; CHECK: udiv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val4_tmp = udiv i32 %val0_tmp, %val1_tmp
+; CHECK: udiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_0
+    ret void
+}
+
+; The point of this test is that we may not actually see (shl GPR32:$Val, (zext GPR32:$Val2))
+; in the DAG (the RHS may be natively 64-bit), but we should still use the lsl instructions.
+define i32 @test_lsl32() {
+; CHECK: test_lsl32:
+
+  %val = load i32* @var32_0
+  %ret = shl i32 1, %val
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
+
+define i32 @test_lsr32() {
+; CHECK: test_lsr32:
+
+  %val = load i32* @var32_0
+  %ret = lshr i32 1, %val
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
+
+define i32 @test_asr32(i32 %in) {
+; CHECK: test_asr32:
+
+  %val = load i32* @var32_0
+  %ret = ashr i32 %in, %val
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll
new file mode 100644
index 0000000..ee89d8d
--- /dev/null
+++ b/test/CodeGen/AArch64/elf-extern.ll
@@ -0,0 +1,21 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s
+
+; External symbols are a different concept to global variables but should still
+; get relocations and so on when used.
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define i32 @check_extern() {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
+  ret i32 0
+}
+
+; CHECK: .rela.text
+; CHECK: ('r_sym', 0x00000009)
+; CHECK-NEXT: ('r_type', 0x0000011b)
+
+; CHECK: .symtab
+; CHECK: Symbol 9
+; CHECK-NEXT: memcpy
+
+
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
new file mode 100644
index 0000000..2989776
--- /dev/null
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s
+
+declare extern_weak i32 @var()
+
+define i32()* @foo() {
+; The usual ADRP/ADD pair can't be used for a weak reference because it must
+; evaluate to 0 if the symbol is undefined. We use a litpool entry.
+  ret i32()* @var
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .xword var
+
+; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0]
+
+}
diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll
new file mode 100644
index 0000000..0626781
--- /dev/null
+++ b/test/CodeGen/AArch64/extract.ll
@@ -0,0 +1,57 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i64 @ror_i64(i64 %in) {
+; CHECK: ror_i64:
+    %left = shl i64 %in, 19
+    %right = lshr i64 %in, 45
+    %val5 = or i64 %left, %right
+; CHECK: extr {{x[0-9]+}}, x0, x0, #45
+    ret i64 %val5
+}
+
+define i32 @ror_i32(i32 %in) {
+; CHECK: ror_i32:
+    %left = shl i32 %in, 9
+    %right = lshr i32 %in, 23
+    %val5 = or i32 %left, %right
+; CHECK: extr {{w[0-9]+}}, w0, w0, #23
+    ret i32 %val5
+}
+
+define i32 @extr_i32(i32 %lhs, i32 %rhs) {
+; CHECK: extr_i32:
+  %left = shl i32 %lhs, 6
+  %right = lshr i32 %rhs, 26
+  %val = or i32 %left, %right
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{w[0-9]+}}, w0, w1, #26
+
+  ret i32 %val
+}
+
+define i64 @extr_i64(i64 %lhs, i64 %rhs) {
+; CHECK: extr_i64:
+  %right = lshr i64 %rhs, 40
+  %left = shl i64 %lhs, 24
+  %val = or i64 %right, %left
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{x[0-9]+}}, x0, x1, #40
+
+  ret i64 %val
+}
+
+; Regression test: a bad experimental pattern crept into git which optimised
+; this pattern to a single EXTR.
+define i32 @extr_regress(i32 %a, i32 %b) {
+; CHECK: extr_regress:
+
+    %sh1 = shl i32 %a, 14
+    %sh2 = lshr i32 %b, 14
+    %val = or i32 %sh2, %sh1
+; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}}
+
+    ret i32 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
new file mode 100644
index 0000000..1a114a5
--- /dev/null
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -0,0 +1,58 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
+
+; This test is designed to be run in the situation where the
+; call-frame is not reserved (hence disable-fp-elim), but where
+; callee-pop can occur (hence tailcallopt).
+
+declare fastcc void @will_pop([8 x i32], i32 %val)
+
+define fastcc void @foo(i32 %in) {
+; CHECK: foo:
+
+  %addr = alloca i8, i32 %in
+
+; Normal frame setup stuff:
+; CHECK: sub sp, sp,
+; CHECK stp x29, x30
+
+; Reserve space for call-frame:
+; CHECK: sub sp, sp, #16
+
+  call fastcc void @will_pop([8 x i32] undef, i32 42)
+; CHECK: bl will_pop
+
+; Since @will_pop is fastcc with tailcallopt, it will put the stack
+; back where it needs to be, we shouldn't duplicate that
+; CHECK-NOT: sub sp, sp, #16
+; CHECK-NOT: add sp, sp,
+
+; CHECK: ldp x29, x30
+; CHECK: add sp, sp,
+  ret void
+}
+
+declare void @wont_pop([8 x i32], i32 %val)
+
+define void @foo1(i32 %in) {
+; CHECK: foo1:
+
+  %addr = alloca i8, i32 %in
+; Normal frame setup again
+; CHECK sub sp, sp,
+; CHECK stp x29, x30
+
+; Reserve space for call-frame
+; CHECK sub sp, sp, #16
+
+  call void @wont_pop([8 x i32] undef, i32 42)
+; CHECK bl wont_pop
+
+; This time we *do* need to unreserve the call-frame
+; CHECK add sp, sp, #16
+
+; Check for epilogue (primarily to make sure sp spotted above wasn't
+; part of it).
+; CHECK: ldp x29, x30
+; CHECK: add sp, sp,
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
new file mode 100644
index 0000000..41cde94
--- /dev/null
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -0,0 +1,123 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; Without tailcallopt fastcc still means the caller cleans up the
+; stack, so try to make sure this is respected.
+
+define fastcc void @func_stack0() {
+; CHECK: func_stack0:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack0:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #48
+; CHECK-TAIL-NEXT: ret
+
+}
+
+define fastcc void @func_stack8([8 x i32], i32 %stacked) {
+; CHECK: func_stack8:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack8:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #64
+; CHECK-TAIL-NEXT: ret
+}
+
+define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
+; CHECK: func_stack32:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack32:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #80
+; CHECK-TAIL-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll
new file mode 100644
index 0000000..ad4a903
--- /dev/null
+++ b/test/CodeGen/AArch64/fcmp.ll
@@ -0,0 +1,81 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+declare void @bar(i32)
+
+define void @test_float(float %a, float %b) {
+; CHECK: test_float:
+
+  %tst1 = fcmp oeq float %a, %b
+  br i1 %tst1, label %end, label %t2
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK: b.eq .L
+
+t2:
+  %tst2 = fcmp une float %b, 0.0
+  br i1 %tst2, label %t3, label %end
+; CHECK: fcmp {{s[0-9]+}}, #0.0
+; CHECK: b.eq .L
+
+
+t3:
+; This test can't be implemented with just one A64 conditional
+; branch. LLVM converts "ordered and not equal" to "unordered or
+; equal" before instruction selection, which is what we currently
+; test. Obviously, other sequences are valid.
+  %tst3 = fcmp one float %a,  %b
+  br i1 %tst3, label %t4, label %end
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.vs .[[T4]]
+t4:
+  %tst4 = fcmp uge float %a, -0.0
+  br i1 %tst4, label %t5, label %end
+; CHECK-NOT: fcmp {{s[0-9]+}}, #0.0
+; CHECK: b.mi .LBB
+
+t5:
+  call void @bar(i32 0)
+  ret void
+end:
+  ret void
+
+}
+
+define void @test_double(double %a, double %b) {
+; CHECK: test_double:
+
+  %tst1 = fcmp oeq double %a, %b
+  br i1 %tst1, label %end, label %t2
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: b.eq .L
+
+t2:
+  %tst2 = fcmp une double %b, 0.0
+  br i1 %tst2, label %t3, label %end
+; CHECK: fcmp {{d[0-9]+}}, #0.0
+; CHECK: b.eq .L
+
+
+t3:
+; This test can't be implemented with just one A64 conditional
+; branch. LLVM converts "ordered and not equal" to "unordered or
+; equal" before instruction selection, which is what we currently
+; test. Obviously, other sequences are valid.
+  %tst3 = fcmp one double %a,  %b
+  br i1 %tst3, label %t4, label %end
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.vs .[[T4]]
+t4:
+  %tst4 = fcmp uge double %a, -0.0
+  br i1 %tst4, label %t5, label %end
+; CHECK-NOT: fcmp {{d[0-9]+}}, #0.0
+; CHECK: b.mi .LBB
+
+t5:
+  call void @bar(i32 0)
+  ret void
+end:
+  ret void
+
+}
diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll
new file mode 100644
index 0000000..0f7b95b
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -0,0 +1,191 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fcvtzs(float %flt, double %dbl) {
+; CHECK: test_fcvtzs:
+
+  %fix1 = fmul float %flt, 128.0
+  %cvt1 = fptosi float %fix1 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i32 %cvt1, i32* @var32
+
+  %fix2 = fmul float %flt, 4294967296.0
+  %cvt2 = fptosi float %fix2 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #32
+  store volatile i32 %cvt2, i32* @var32
+
+  %fix3 = fmul float %flt, 128.0
+  %cvt3 = fptosi float %fix3 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i64 %cvt3, i64* @var64
+
+  %fix4 = fmul float %flt, 18446744073709551616.0
+  %cvt4 = fptosi float %fix4 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #64
+  store volatile i64 %cvt4, i64* @var64
+
+  %fix5 = fmul double %dbl, 128.0
+  %cvt5 = fptosi double %fix5 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i32 %cvt5, i32* @var32
+
+  %fix6 = fmul double %dbl, 4294967296.0
+  %cvt6 = fptosi double %fix6 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #32
+  store volatile i32 %cvt6, i32* @var32
+
+  %fix7 = fmul double %dbl, 128.0
+  %cvt7 = fptosi double %fix7 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i64 %cvt7, i64* @var64
+
+  %fix8 = fmul double %dbl, 18446744073709551616.0
+  %cvt8 = fptosi double %fix8 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #64
+  store volatile i64 %cvt8, i64* @var64
+
+  ret void
+}
+
+define void @test_fcvtzu(float %flt, double %dbl) {
+; CHECK: test_fcvtzu:
+
+  %fix1 = fmul float %flt, 128.0
+  %cvt1 = fptoui float %fix1 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i32 %cvt1, i32* @var32
+
+  %fix2 = fmul float %flt, 4294967296.0
+  %cvt2 = fptoui float %fix2 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #32
+  store volatile i32 %cvt2, i32* @var32
+
+  %fix3 = fmul float %flt, 128.0
+  %cvt3 = fptoui float %fix3 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i64 %cvt3, i64* @var64
+
+  %fix4 = fmul float %flt, 18446744073709551616.0
+  %cvt4 = fptoui float %fix4 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #64
+  store volatile i64 %cvt4, i64* @var64
+
+  %fix5 = fmul double %dbl, 128.0
+  %cvt5 = fptoui double %fix5 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i32 %cvt5, i32* @var32
+
+  %fix6 = fmul double %dbl, 4294967296.0
+  %cvt6 = fptoui double %fix6 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #32
+  store volatile i32 %cvt6, i32* @var32
+
+  %fix7 = fmul double %dbl, 128.0
+  %cvt7 = fptoui double %fix7 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i64 %cvt7, i64* @var64
+
+  %fix8 = fmul double %dbl, 18446744073709551616.0
+  %cvt8 = fptoui double %fix8 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #64
+  store volatile i64 %cvt8, i64* @var64
+
+  ret void
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_scvtf(i32 %int, i64 %long) {
+; CHECK: test_scvtf:
+
+  %cvt1 = sitofp i32 %int to float
+  %fix1 = fdiv float %cvt1, 128.0
+; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile float %fix1, float* @varfloat
+
+  %cvt2 = sitofp i32 %int to float
+  %fix2 = fdiv float %cvt2, 4294967296.0
+; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile float %fix2, float* @varfloat
+
+  %cvt3 = sitofp i64 %long to float
+  %fix3 = fdiv float %cvt3, 128.0
+; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile float %fix3, float* @varfloat
+
+  %cvt4 = sitofp i64 %long to float
+  %fix4 = fdiv float %cvt4, 18446744073709551616.0
+; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile float %fix4, float* @varfloat
+
+  %cvt5 = sitofp i32 %int to double
+  %fix5 = fdiv double %cvt5, 128.0
+; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile double %fix5, double* @vardouble
+
+  %cvt6 = sitofp i32 %int to double
+  %fix6 = fdiv double %cvt6, 4294967296.0
+; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile double %fix6, double* @vardouble
+
+  %cvt7 = sitofp i64 %long to double
+  %fix7 = fdiv double %cvt7, 128.0
+; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile double %fix7, double* @vardouble
+
+  %cvt8 = sitofp i64 %long to double
+  %fix8 = fdiv double %cvt8, 18446744073709551616.0
+; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile double %fix8, double* @vardouble
+
+  ret void
+}
+
+define void @test_ucvtf(i32 %int, i64 %long) {
+; CHECK: test_ucvtf:
+
+  %cvt1 = uitofp i32 %int to float
+  %fix1 = fdiv float %cvt1, 128.0
+; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile float %fix1, float* @varfloat
+
+  %cvt2 = uitofp i32 %int to float
+  %fix2 = fdiv float %cvt2, 4294967296.0
+; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile float %fix2, float* @varfloat
+
+  %cvt3 = uitofp i64 %long to float
+  %fix3 = fdiv float %cvt3, 128.0
+; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile float %fix3, float* @varfloat
+
+  %cvt4 = uitofp i64 %long to float
+  %fix4 = fdiv float %cvt4, 18446744073709551616.0
+; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile float %fix4, float* @varfloat
+
+  %cvt5 = uitofp i32 %int to double
+  %fix5 = fdiv double %cvt5, 128.0
+; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile double %fix5, double* @vardouble
+
+  %cvt6 = uitofp i32 %int to double
+  %fix6 = fdiv double %cvt6, 4294967296.0
+; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile double %fix6, double* @vardouble
+
+  %cvt7 = uitofp i64 %long to double
+  %fix7 = fdiv double %cvt7, 128.0
+; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile double %fix7, double* @vardouble
+
+  %cvt8 = uitofp i64 %long to double
+  %fix8 = fdiv double %cvt8, 18446744073709551616.0
+; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile double %fix8, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll
new file mode 100644
index 0000000..c771d68
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt-int.ll
@@ -0,0 +1,151 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_floattoi32(float %in) {
+; CHECK: test_floattoi32:
+
+  %signed = fptosi float %in to i32
+  %unsigned = fptoui float %in to i32
+; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}}
+; CHECK: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}}
+
+  %res = sub i32 %signed, %unsigned
+; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i32 %res
+; CHECK: ret
+}
+
+define i32 @test_doubletoi32(double %in) {
+; CHECK: test_doubletoi32:
+
+  %signed = fptosi double %in to i32
+  %unsigned = fptoui double %in to i32
+; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}}
+; CHECK: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}}
+
+  %res = sub i32 %signed, %unsigned
+; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i32 %res
+; CHECK: ret
+}
+
+define i64 @test_floattoi64(float %in) {
+; CHECK: test_floattoi64:
+
+  %signed = fptosi float %in to i64
+  %unsigned = fptoui float %in to i64
+; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}}
+; CHECK: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}}
+
+  %res = sub i64 %signed, %unsigned
+; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i64 %res
+; CHECK: ret
+}
+
+define i64 @test_doubletoi64(double %in) {
+; CHECK: test_doubletoi64:
+
+  %signed = fptosi double %in to i64
+  %unsigned = fptoui double %in to i64
+; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}}
+; CHECK: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}}
+
+  %res = sub i64 %signed, %unsigned
+; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i64 %res
+; CHECK: ret
+}
+
+define float @test_i32tofloat(i32 %in) {
+; CHECK: test_i32tofloat:
+
+  %signed = sitofp i32 %in to float
+  %unsigned = uitofp i32 %in to float
+; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}}
+; CHECK: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}}
+
+  %res = fsub float %signed, %unsigned
+; CHECL: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret float %res
+; CHECK: ret
+}
+
+define double @test_i32todouble(i32 %in) {
+; CHECK: test_i32todouble:
+
+  %signed = sitofp i32 %in to double
+  %unsigned = uitofp i32 %in to double
+; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}}
+; CHECK: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}}
+
+  %res = fsub double %signed, %unsigned
+; CHECK: fsub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret double %res
+; CHECK: ret
+}
+
+define float @test_i64tofloat(i64 %in) {
+; CHECK: test_i64tofloat:
+
+  %signed = sitofp i64 %in to float
+  %unsigned = uitofp i64 %in to float
+; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}}
+; CHECK: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}}
+
+  %res = fsub float %signed, %unsigned
+; CHECK: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret float %res
+; CHECK: ret
+}
+
+define double @test_i64todouble(i64 %in) {
+; CHECK: test_i64todouble:
+
+  %signed = sitofp i64 %in to double
+  %unsigned = uitofp i64 %in to double
+; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}}
+; CHECK: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}}
+
+  %res = fsub double %signed, %unsigned
+; CHECK: sub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret double %res
+; CHECK: ret
+}
+
+define i32 @test_bitcastfloattoi32(float %in) {
+; CHECK: test_bitcastfloattoi32:
+
+   %res = bitcast float %in to i32
+; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+   ret i32 %res
+}
+
+define i64 @test_bitcastdoubletoi64(double %in) {
+; CHECK: test_bitcastdoubletoi64:
+
+   %res = bitcast double %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define float @test_bitcasti32tofloat(i32 %in) {
+; CHECK: test_bitcasti32tofloat:
+
+   %res = bitcast i32 %in to float
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+   ret float %res
+
+}
+
+define double @test_bitcasti64todouble(i64 %in) {
+; CHECK: test_bitcasti64todouble:
+
+   %res = bitcast i64 %in to double
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret double %res
+
+}
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
new file mode 100644
index 0000000..940c146
--- /dev/null
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; LLVM should be able to cope with multiple uses of the same flag-setting
+; instruction at different points of a routine. Either by rematerializing the
+; compare or by saving and restoring the flag register.
+
+declare void @bar()
+
+@var = global i32 0
+
+define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) {
+; CHECK: test_multiflag:
+
+  %test = icmp ne i32 %n, %m
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+
+  %val = zext i1 %test to i32
+; CHECK: csinc {{[xw][0-9]+}}, {{xzr|wzr}}, {{xzr|wzr}}, eq
+
+  store i32 %val, i32* @var
+
+  call void @bar()
+; CHECK: bl bar
+
+  ; Currently, the comparison is emitted again. An MSR/MRS pair would also be
+  ; acceptable, but assuming the call preserves NZCV is not.
+  br i1 %test, label %iftrue, label %iffalse
+; CHECK: cmp [[LHS]], [[RHS]]
+; CHECK: b.eq
+
+iftrue:
+  ret i32 42
+iffalse:
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll
new file mode 100644
index 0000000..c94ba9b
--- /dev/null
+++ b/test/CodeGen/AArch64/floatdp_1source.ll
@@ -0,0 +1,138 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varhalf = global half 0.0
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+declare float @fabsf(float) readonly
+declare double @fabs(double) readonly
+
+declare float @llvm.sqrt.f32(float %Val)
+declare double @llvm.sqrt.f64(double %Val)
+
+declare float @ceilf(float) readonly
+declare double @ceil(double) readonly
+
+declare float @floorf(float) readonly
+declare double @floor(double) readonly
+
+declare float @truncf(float) readonly
+declare double @trunc(double) readonly
+
+declare float @rintf(float) readonly
+declare double @rint(double) readonly
+
+declare float @nearbyintf(float) readonly
+declare double @nearbyint(double) readonly
+
+define void @simple_float() {
+; CHECK: simple_float:
+  %val1 = load volatile float* @varfloat
+
+  %valabs = call float @fabsf(float %val1)
+  store volatile float %valabs, float* @varfloat
+; CHECK: fabs {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valneg = fsub float -0.0, %val1
+  store volatile float %valneg, float* @varfloat
+; CHECK: fneg {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valsqrt = call float @llvm.sqrt.f32(float %val1)
+  store volatile float %valsqrt, float* @varfloat
+; CHECK: fsqrt {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valceil = call float @ceilf(float %val1)
+  store volatile float %valceil, float* @varfloat
+; CHECK: frintp {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valfloor = call float @floorf(float %val1)
+  store volatile float %valfloor, float* @varfloat
+; CHECK: frintm {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valtrunc = call float @truncf(float %val1)
+  store volatile float %valtrunc, float* @varfloat
+; CHECK: frintz {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valrint = call float @rintf(float %val1)
+  store volatile float %valrint, float* @varfloat
+; CHECK: frintx {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valnearbyint = call float @nearbyintf(float %val1)
+  store volatile float %valnearbyint, float* @varfloat
+; CHECK: frinti {{s[0-9]+}}, {{s[0-9]+}}
+
+  ret void
+}
+
+define void @simple_double() {
+; CHECK: simple_double:
+  %val1 = load volatile double* @vardouble
+
+  %valabs = call double @fabs(double %val1)
+  store volatile double %valabs, double* @vardouble
+; CHECK: fabs {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valneg = fsub double -0.0, %val1
+  store volatile double %valneg, double* @vardouble
+; CHECK: fneg {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valsqrt = call double @llvm.sqrt.f64(double %val1)
+  store volatile double %valsqrt, double* @vardouble
+; CHECK: fsqrt {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valceil = call double @ceil(double %val1)
+  store volatile double %valceil, double* @vardouble
+; CHECK: frintp {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valfloor = call double @floor(double %val1)
+  store volatile double %valfloor, double* @vardouble
+; CHECK: frintm {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valtrunc = call double @trunc(double %val1)
+  store volatile double %valtrunc, double* @vardouble
+; CHECK: frintz {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valrint = call double @rint(double %val1)
+  store volatile double %valrint, double* @vardouble
+; CHECK: frintx {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valnearbyint = call double @nearbyint(double %val1)
+  store volatile double %valnearbyint, double* @vardouble
+; CHECK: frinti {{d[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
+
+define void @converts() {
+; CHECK: converts:
+
+  %val16 = load volatile half* @varhalf
+  %val32 = load volatile float* @varfloat
+  %val64 = load volatile double* @vardouble
+
+  %val16to32 = fpext half %val16 to float
+  store volatile float %val16to32, float* @varfloat
+; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
+
+  %val16to64 = fpext half %val16 to double
+  store volatile double %val16to64, double* @vardouble
+; CHECK: fcvt {{d[0-9]+}}, {{h[0-9]+}}
+
+  %val32to16 = fptrunc float %val32 to half
+  store volatile half %val32to16, half* @varhalf
+; CHECK: fcvt {{h[0-9]+}}, {{s[0-9]+}}
+
+  %val32to64 = fpext float %val32 to double
+  store volatile double %val32to64, double* @vardouble
+; CHECK: fcvt {{d[0-9]+}}, {{s[0-9]+}}
+
+  %val64to16 = fptrunc double %val64 to half
+  store volatile half %val64to16, half* @varhalf
+; CHECK: fcvt {{h[0-9]+}}, {{d[0-9]+}}
+
+  %val64to32 = fptrunc double %val64 to float
+  store volatile float %val64to32, float* @varfloat
+; CHECK: fcvt {{s[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll
new file mode 100644
index 0000000..b2256b3
--- /dev/null
+++ b/test/CodeGen/AArch64/floatdp_2source.ll
@@ -0,0 +1,60 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @testfloat() {
+; CHECK: testfloat:
+  %val1 = load float* @varfloat
+
+  %val2 = fadd float %val1, %val1
+; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val3 = fmul float %val2, %val1
+; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val4 = fdiv float %val3, %val1
+; CHECK: fdiv {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val5 = fsub float %val4, %val2
+; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  store volatile float %val5, float* @varfloat
+
+; These will be enabled with the implementation of floating-point litpool entries.
+  %val6 = fmul float %val1, %val2
+  %val7 = fsub float -0.0, %val6
+; CHECK: fnmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  store volatile float %val7, float* @varfloat
+
+  ret void
+}
+
+define void @testdouble() {
+; CHECK: testdouble:
+  %val1 = load double* @vardouble
+
+  %val2 = fadd double %val1, %val1
+; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val3 = fmul double %val2, %val1
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val4 = fdiv double %val3, %val1
+; CHECK: fdiv {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val5 = fsub double %val4, %val2
+; CHECK: fsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  store volatile double %val5, double* @vardouble
+
+; These will be enabled with the implementation of doubleing-point litpool entries.
+   %val6 = fmul double %val1, %val2
+   %val7 = fsub double -0.0, %val6
+; CHECK: fnmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+   store volatile double %val7, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
new file mode 100644
index 0000000..56e8f16
--- /dev/null
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csel:
+
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float 0.0, float 1.0
+  store float %val1, float* @varfloat
+; CHECK: ldr [[FLT0:s[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: fmov [[FLT1:s[0-9]+]], #1.0
+; CHECK: fcsel {{s[0-9]+}}, [[FLT0]], [[FLT1]], hi
+
+  %rhs64 = sext i32 %rhs32 to i64
+  %tst2 = icmp sle i64 %lhs64, %rhs64
+  %val2 = select i1 %tst2, double 1.0, double 0.0
+  store double %val2, double* @vardouble
+; CHECK: ldr [[FLT0:d[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: fmov [[FLT1:d[0-9]+]], #1.0
+; CHECK: fcsel {{d[0-9]+}}, [[FLT1]], [[FLT0]], le
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll
new file mode 100644
index 0000000..39db9be
--- /dev/null
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -0,0 +1,102 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+
+define float @test_fmadd(float %a, float %b, float %c) {
+; CHECK: test_fmadd:
+  %val = call float @llvm.fma.f32(float %a, float %b, float %c)
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fmsub(float %a, float %b, float %c) {
+; CHECK: test_fmsub:
+  %nega = fsub float -0.0, %a
+  %val = call float @llvm.fma.f32(float %nega, float %b, float %c)
+; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fnmadd(float %a, float %b, float %c) {
+; CHECK: test_fnmadd:
+  %negc = fsub float -0.0, %c
+  %val = call float @llvm.fma.f32(float %a, float %b, float %negc)
+; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fnmsub(float %a, float %b, float %c) {
+; CHECK: test_fnmsub:
+  %nega = fsub float -0.0, %a
+  %negc = fsub float -0.0, %c
+  %val = call float @llvm.fma.f32(float %nega, float %b, float %negc)
+; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define double @testd_fmadd(double %a, double %b, double %c) {
+; CHECK: testd_fmadd:
+  %val = call double @llvm.fma.f64(double %a, double %b, double %c)
+; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fmsub(double %a, double %b, double %c) {
+; CHECK: testd_fmsub:
+  %nega = fsub double -0.0, %a
+  %val = call double @llvm.fma.f64(double %nega, double %b, double %c)
+; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fnmadd(double %a, double %b, double %c) {
+; CHECK: testd_fnmadd:
+  %negc = fsub double -0.0, %c
+  %val = call double @llvm.fma.f64(double %a, double %b, double %negc)
+; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fnmsub(double %a, double %b, double %c) {
+; CHECK: testd_fnmsub:
+  %nega = fsub double -0.0, %a
+  %negc = fsub double -0.0, %c
+  %val = call double @llvm.fma.f64(double %nega, double %b, double %negc)
+; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define float @test_fmadd_unfused(float %a, float %b, float %c) {
+; CHECK: test_fmadd_unfused:
+  %prod = fmul float %b, %c
+  %sum = fadd float %a, %prod
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %sum
+}
+
+define float @test_fmsub_unfused(float %a, float %b, float %c) {
+; CHECK: test_fmsub_unfused:
+  %prod = fmul float %b, %c
+  %diff = fsub float %a, %prod
+; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %diff
+}
+
+define float @test_fnmadd_unfused(float %a, float %b, float %c) {
+; CHECK: test_fnmadd_unfused:
+  %nega = fsub float -0.0, %a
+  %prod = fmul float %b, %c
+  %sum = fadd float %nega, %prod
+; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %sum
+}
+
+define float @test_fnmsub_unfused(float %a, float %b, float %c) {
+; CHECK: test_fnmsub_unfused:
+  %nega = fsub float -0.0, %a
+  %prod = fmul float %b, %c
+  %diff = fsub float %nega, %prod
+; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %diff
+}
diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll
new file mode 100644
index 0000000..b5bdcf4
--- /dev/null
+++ b/test/CodeGen/AArch64/fp128-folding.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+declare void @bar(i8*, i8*, i32*)
+
+; SelectionDAG used to try to fold some fp128 operations using the ppc128 type,
+; which is not supported.
+
+define fp128 @test_folding() {
+; CHECK: test_folding:
+  %l = alloca i32
+  store i32 42, i32* %l
+  %val = load i32* %l
+  %fpval = sitofp i32 %val to fp128
+  ; If the value is loaded from a constant pool into an fp128, it's been folded
+  ; successfully.
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI
+  ret fp128 %fpval
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll
new file mode 100644
index 0000000..258d34b
--- /dev/null
+++ b/test/CodeGen/AArch64/fp128.ll
@@ -0,0 +1,280 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@lhs = global fp128 zeroinitializer
+@rhs = global fp128 zeroinitializer
+
+define fp128 @test_add() {
+; CHECK: test_add:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fadd fp128 %lhs, %rhs
+; CHECK: bl __addtf3
+  ret fp128 %val
+}
+
+define fp128 @test_sub() {
+; CHECK: test_sub:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fsub fp128 %lhs, %rhs
+; CHECK: bl __subtf3
+  ret fp128 %val
+}
+
+define fp128 @test_mul() {
+; CHECK: test_mul:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fmul fp128 %lhs, %rhs
+; CHECK: bl __multf3
+  ret fp128 %val
+}
+
+define fp128 @test_div() {
+; CHECK: test_div:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fdiv fp128 %lhs, %rhs
+; CHECK: bl __divtf3
+  ret fp128 %val
+}
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fptosi() {
+; CHECK: test_fptosi:
+  %val = load fp128* @lhs
+
+  %val32 = fptosi fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixtfsi
+
+  %val64 = fptosi fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixtfdi
+
+  ret void
+}
+
+define void @test_fptoui() {
+; CHECK: test_fptoui:
+  %val = load fp128* @lhs
+
+  %val32 = fptoui fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixunstfsi
+
+  %val64 = fptoui fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixunstfdi
+
+  ret void
+}
+
+define void @test_sitofp() {
+; CHECK: test_sitofp:
+
+  %src32 = load i32* @var32
+  %val32 = sitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatsitf
+
+  %src64 = load i64* @var64
+  %val64 = sitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatditf
+
+  ret void
+}
+
+define void @test_uitofp() {
+; CHECK: test_uitofp:
+
+  %src32 = load i32* @var32
+  %val32 = uitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatunsitf
+
+  %src64 = load i64* @var64
+  %val64 = uitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatunditf
+
+  ret void
+}
+
+define i1 @test_setcc1() {
+; CHECK: test_setcc1:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ole fp128 %lhs, %rhs
+; CHECK: bl __letf2
+; CHECK: cmp w0, #0
+; CHECK: csinc w0, wzr, wzr, gt
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i1 @test_setcc2() {
+; CHECK: test_setcc2:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ugt fp128 %lhs, %rhs
+; CHECK: bl      __unordtf2
+; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
+
+; CHECK: bl      __gttf2
+; CHECK: cmp w0, #0
+; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
+; CHECK: cmp w[[UNORDERED]], #0
+; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+; CHECK: orr     w0, [[UNORDERED]], [[GT]]
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i32 @test_br_cc() {
+; CHECK: test_br_cc:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
+  %cond = fcmp olt fp128 %lhs, %rhs
+; CHECK: bl      __unordtf2
+; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
+
+; CHECK: bl      __getf2
+; CHECK: cmp w0, #0
+
+; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
+; CHECK: cmp w[[UNORDERED]], #0
+; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
+; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
+  br i1 %cond, label %iftrue, label %iffalse
+
+iftrue:
+  ret i32 42
+; CHECK-NEXT: BB#
+; CHECK-NEXT: movz x0, #42
+; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
+
+iffalse:
+  ret i32 29
+; CHECK: [[RET29]]:
+; CHECK-NEXT: movz x0, #29
+; CHECK-NEXT: [[REALRET]]:
+; CHECK: ret
+}
+
+define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
+; CHECK: test_select:
+
+  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
+  store fp128 %val, fp128* @lhs
+; CHECK: cmp w0, #0
+; CHECK: str q1, [sp]
+; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: [[IFFALSE]]:
+; CHECK-NEXT: ldr q0, [sp]
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+  ret void
+; CHECK: ret
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_round() {
+; CHECK: test_round:
+
+  %val = load fp128* @lhs
+
+  %float = fptrunc fp128 %val to float
+  store float %float, float* @varfloat
+; CHECK: bl __trunctfsf2
+; CHECK: str s0, [{{x[0-9]+}}, #:lo12:varfloat]
+
+  %double = fptrunc fp128 %val to double
+  store double %double, double* @vardouble
+; CHECK: bl __trunctfdf2
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+
+  ret void
+}
+
+define void @test_extend() {
+; CHECK: test_extend:
+
+  %val = load fp128* @lhs
+
+  %float = load float* @varfloat
+  %fromfloat = fpext float %float to fp128
+  store volatile fp128 %fromfloat, fp128* @lhs
+; CHECK: bl __extendsftf2
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+
+  %double = load double* @vardouble
+  %fromdouble = fpext double %double to fp128
+  store volatile fp128 %fromdouble, fp128* @lhs
+; CHECK: bl __extenddftf2
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+
+  ret void
+; CHECK: ret
+}
+
+define fp128 @test_neg(fp128 %in) {
+; CHECK: [[MINUS0:.LCPI[0-9]+_0]]:
+; Make sure the weird hex constant below *is* -0.0
+; CHECK-NEXT: fp128 -0
+
+; CHECK: test_neg:
+
+  ; Could in principle be optimized to fneg which we can't select, this makes
+  ; sure that doesn't happen.
+  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
+; CHECK: str q0, [sp, #-16]
+; CHECK-NEXT: ldr q1, [sp], #16
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:[[MINUS0]]]
+; CHECK: bl __subtf3
+
+  ret fp128 %ret
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
new file mode 100644
index 0000000..fd28aee
--- /dev/null
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -0,0 +1,34 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varf32 = global float 0.0
+@varf64 = global double 0.0
+
+define void @check_float() {
+; CHECK: check_float:
+
+  %val = load float* @varf32
+  %newval1 = fadd float %val, 8.5
+  store volatile float %newval1, float* @varf32
+; CHECK: fmov {{s[0-9]+}}, #8.5
+
+  %newval2 = fadd float %val, 128.0
+  store volatile float %newval2, float* @varf32
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI0_0
+
+  ret void
+}
+
+define void @check_double() {
+; CHECK: check_double:
+
+  %val = load double* @varf64
+  %newval1 = fadd double %val, 8.5
+  store volatile double %newval1, double* @varf64
+; CHECK: fmov {{d[0-9]+}}, #8.5
+
+  %newval2 = fadd double %val, 128.0
+  store volatile double %newval2, double* @varf64
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
new file mode 100644
index 0000000..5675e5a
--- /dev/null
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -0,0 +1,193 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+%myStruct = type { i64 , i8, i32 }
+
+@var8 = global i8 0
+@var32 = global i32 0
+@var64 = global i64 0
+@var128 = global i128 0
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+@varstruct = global %myStruct zeroinitializer
+
+define void @take_i8s(i8 %val1, i8 %val2) {
+; CHECK: take_i8s:
+    store i8 %val2, i8* @var8
+    ; Not using w1 may be technically allowed, but it would indicate a
+    ; problem in itself.
+;  CHECK: strb w1, [{{x[0-9]+}}, #:lo12:var8]
+    ret void
+}
+
+define void @add_floats(float %val1, float %val2) {
+; CHECK: add_floats:
+    %newval = fadd float %val1, %val2
+; CHECK: fadd [[ADDRES:s[0-9]+]], s0, s1
+    store float %newval, float* @varfloat
+; CHECK: str [[ADDRES]], [{{x[0-9]+}}, #:lo12:varfloat]
+    ret void
+}
+
+; byval pointers should be allocated to the stack and copied as if
+; with memcpy.
+define void @take_struct(%myStruct* byval %structval) {
+; CHECK: take_struct:
+    %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
+    %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+
+    %val0 = load i32* %addr0
+    ; Some weird move means x0 is used for one access
+; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
+    store i32 %val0, i32* @var32
+; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+
+    %val1 = load i64* %addr1
+; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
+    store i64 %val1, i64* @var64
+; CHECK str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    ret void
+}
+
+; %structval should be at sp + 16
+define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %structval) {
+; CHECK: check_byval_align:
+
+    %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
+    %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+
+    %val0 = load i32* %addr0
+    ; Some weird move means x0 is used for one access
+; CHECK: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16
+; CHECK: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12]
+    store i32 %val0, i32* @var32
+; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+
+    %val1 = load i64* %addr1
+; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
+    store i64 %val1, i64* @var64
+; CHECK str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    ret void
+}
+
+define i32 @return_int() {
+; CHECK: return_int:
+    %val = load i32* @var32
+    ret i32 %val
+; CHECK: ldr w0, [{{x[0-9]+}}, #:lo12:var32]
+    ; Make sure epilogue follows
+; CHECK-NEXT: ret
+}
+
+define double @return_double() {
+; CHECK: return_double:
+    ret double 3.14
+; CHECK: ldr d0, [{{x[0-9]+}}, #:lo12:.LCPI
+}
+
+; This is the kind of IR clang will produce for returning a struct
+; small enough to go into registers. Not all that pretty, but it
+; works.
+define [2 x i64] @return_struct() {
+; CHECK: return_struct:
+    %addr = bitcast %myStruct* @varstruct to [2 x i64]*
+    %val = load [2 x i64]* %addr
+    ret [2 x i64] %val
+; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:varstruct]
+    ; Odd register regex below disallows x0 which we want to be live now.
+; CHECK: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, #:lo12:varstruct
+; CHECK-NEXT: ldr x1, [{{x[1-9][0-9]*}}, #8]
+    ; Make sure epilogue immediately follows
+; CHECK-NEXT: ret
+}
+
+; Large structs are passed by reference (storage allocated by caller
+; to preserve value semantics) in x8. Strictly this only applies to
+; structs larger than 16 bytes, but C semantics can still be provided
+; if LLVM does it to %myStruct too. So this is the simplest check
+define void @return_large_struct(%myStruct* sret %retval) {
+; CHECK: return_large_struct:
+    %addr0 = getelementptr %myStruct* %retval, i64 0, i32 0
+    %addr1 = getelementptr %myStruct* %retval, i64 0, i32 1
+    %addr2 = getelementptr %myStruct* %retval, i64 0, i32 2
+
+    store i64 42, i64* %addr0
+    store i8 2, i8* %addr1
+    store i32 9, i32* %addr2
+; CHECK: str {{x[0-9]+}}, [x8]
+; CHECK: strb {{w[0-9]+}}, [x8, #8]
+; CHECK: str {{w[0-9]+}}, [x8, #12]
+
+    ret void
+}
+
+; This struct is just too far along to go into registers: (only x7 is
+; available, but it needs two). Also make sure that %stacked doesn't
+; sneak into x7 behind.
+define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
+                          i32* %var6, %myStruct* byval %struct, i32* byval %stacked,
+                          double %notstacked) {
+; CHECK: struct_on_stack:
+    %addr = getelementptr %myStruct* %struct, i64 0, i32 0
+    %val64 = load i64* %addr
+    store i64 %val64, i64* @var64
+    ; Currently nothing on local stack, so struct should be at sp
+; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
+; CHECK: str [[VAL64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    store double %notstacked, double* @vardouble
+; CHECK-NOT: ldr d0
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble
+
+    %retval = load i32* %stacked
+    ret i32 %retval
+; CHECK: ldr w0, [sp, #16]
+}
+
+define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
+                         float %var4, float %var5, float %var6, float %var7,
+                         float %var8) {
+; CHECK: stacked_fpu:
+    store float %var8, float* @varfloat
+    ; Beware as above: the offset would be different on big-endian
+    ; machines if the first ldr were changed to use s-registers.
+; CHECK: ldr d[[VALFLOAT:[0-9]+]], [sp]
+; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, #:lo12:varfloat]
+
+    ret void
+}
+
+; 128-bit integer types should be passed in xEVEN, xODD rather than
+; the reverse. In this case x2 and x3. Nothing should use x1.
+define i32 @check_i128_regalign(i32 %val0, i128 %val1, i32 %val2) {
+; CHECK: check_i128_regalign
+    store i128 %val1, i128* @var128
+; CHECK: str x2, [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: str x3, [{{x[0-9]+}}, #8]
+
+    ret i32 %val2
+; CHECK: mov x0, x4
+}
+
+define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                                   i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                                   i32 %stack1, i128 %stack2) {
+; CHECK: check_i128_stackalign
+    store i128 %stack2, i128* @var128
+    ; Nothing local on stack in current codegen, so first stack is 16 away
+; CHECK: ldr {{x[0-9]+}}, [sp, #16]
+    ; Important point is that we address sp+24 for second dword
+; CHECK: add     [[REG:x[0-9]+]], sp, #16
+; CHECK: ldr     {{x[0-9]+}}, {{\[}}[[REG]], #8]
+    ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define i32 @test_extern() {
+; CHECK: test_extern:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
+; CHECK: bl memcpy
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
new file mode 100644
index 0000000..abb09a5
--- /dev/null
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -0,0 +1,140 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+%myStruct = type { i64 , i8, i32 }
+
+@var8 = global i8 0
+@var8_2 = global i8 0
+@var32 = global i32 0
+@var64 = global i64 0
+@var128 = global i128 0
+@varfloat = global float 0.0
+@varfloat_2 = global float 0.0
+@vardouble = global double 0.0
+@varstruct = global %myStruct zeroinitializer
+@varsmallstruct = global [2 x i64] zeroinitializer
+
+declare void @take_i8s(i8 %val1, i8 %val2)
+declare void @take_floats(float %val1, float %val2)
+
+define void @simple_args() {
+; CHECK: simple_args:
+  %char1 = load i8* @var8
+  %char2 = load i8* @var8_2
+  call void @take_i8s(i8 %char1, i8 %char2)
+; CHECK: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
+; CHECK: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
+; CHECK: bl take_i8s
+
+  %float1 = load float* @varfloat
+  %float2 = load float* @varfloat_2
+  call void @take_floats(float %float1, float %float2)
+; CHECK: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
+; CHECK: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK: bl take_floats
+
+  ret void
+}
+
+declare i32 @return_int()
+declare double @return_double()
+declare [2 x i64] @return_smallstruct()
+declare void @return_large_struct(%myStruct* sret %retval)
+
+define void @simple_rets() {
+; CHECK: simple_rets:
+
+  %int = call i32 @return_int()
+  store i32 %int, i32* @var32
+; CHECK: bl return_int
+; CHECK: str w0, [{{x[0-9]+}}, #:lo12:var32]
+
+  %dbl = call double @return_double()
+  store double %dbl, double* @vardouble
+; CHECK: bl return_double
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+
+  %arr = call [2 x i64] @return_smallstruct()
+  store [2 x i64] %arr, [2 x i64]* @varsmallstruct
+; CHECK: bl return_smallstruct
+; CHECK: str x1, [{{x[0-9]+}}, #8]
+; CHECK: str x0, [{{x[0-9]+}}, #:lo12:varsmallstruct]
+
+  call void @return_large_struct(%myStruct* sret @varstruct)
+; CHECK: add x8, {{x[0-9]+}}, #:lo12:varstruct
+; CHECK bl return_large_struct
+
+  ret void
+}
+
+
+declare i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
+                             i32* %var6, %myStruct* byval %struct, i32 %stacked,
+                             double %notstacked)
+declare void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
+                          float %var4, float %var5, float %var6, float %var7,
+                          float %var8)
+
+define void @check_stack_args() {
+  call i32 @struct_on_stack(i8 0, i16 12, i32 42, i64 99, i128 1,
+                            i32* @var32, %myStruct* byval @varstruct,
+                            i32 999, double 1.0)
+  ; Want to check that the final double is passed in registers and
+  ; that varstruct is passed on the stack. Rather dependent on how a
+  ; memcpy gets created, but the following works for now.
+; CHECK: mov x0, sp
+; CHECK: str {{w[0-9]+}}, [x0]
+; CHECK: str {{w[0-9]+}}, [x0, #12]
+; CHECK: fmov d0,
+; CHECK: bl struct_on_stack
+
+  call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0,
+                         float -2.0, float -8.0, float 16.0, float 1.0,
+                         float 64.0)
+; CHECK: ldr s[[STACKEDREG:[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: mov x0, sp
+; CHECK: str d[[STACKEDREG]], [x0]
+; CHECK bl stacked_fpu
+  ret void
+}
+
+
+declare void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                                    i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                                    i32 %stack1, i128 %stack2)
+
+declare void @check_i128_regalign(i32 %val0, i128 %val1)
+
+
+define void @check_i128_align() {
+; CHECK: check_i128_align:
+  %val = load i128* @var128
+  call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
+                                   i32 4, i32 5, i32 6, i32 7,
+                                   i32 42, i128 %val)
+; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK: mov x[[SPREG:[0-9]+]], sp
+; CHECK: str [[I128HI]], [x[[SPREG]], #24]
+; CHECK: str [[I128LO]], [x[[SPREG]], #16]
+; CHECK: bl check_i128_stackalign
+
+  call void @check_i128_regalign(i32 0, i128 42)
+; CHECK-NOT: mov x1
+; CHECK: movz x2, #42
+; CHECK: mov x3, xzr
+; CHECK: bl check_i128_regalign
+
+  ret void
+}
+
+@fptr = global void()* null
+
+define void @check_indirect_call() {
+; CHECK: check_indirect_call:
+  %func = load void()** @fptr
+  call void %func()
+; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, #:lo12:fptr]
+; CHECK: blr [[FPTR]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
new file mode 100644
index 0000000..8ed6e55
--- /dev/null
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -0,0 +1,69 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var32 = global [3 x i32] zeroinitializer
+@var64 = global [3 x i64] zeroinitializer
+@var32_align64 = global [3 x i32] zeroinitializer, align 8
+
+define i64 @test_align32() {
+; CHECK: test_align32:
+  %addr = bitcast [3 x i32]* @var32 to i64*
+
+  ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
+  ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
+  %val = load i64* %addr
+; CHECK: adrp [[HIBITS:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:var32
+; CHECK: ldr x0, [x[[ADDR]]]
+
+  ret i64 %val
+}
+
+define i64 @test_align64() {
+; CHECK: test_align64:
+  %addr = bitcast [3 x i64]* @var64 to i64*
+
+  ; However, var64 *is* properly aligned and emitting an adrp/add/ldr would be
+  ; inefficient.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], var64
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var64]
+
+  ret i64 %val
+}
+
+define i64 @test_var32_align64() {
+; CHECK: test_var32_align64:
+  %addr = bitcast [3 x i32]* @var32_align64 to i64*
+
+  ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
+  ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], var32_align64
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var32_align64]
+
+  ret i64 %val
+}
+
+@yet_another_var = external global {i32, i32}
+
+define i64 @test_yet_another_var() {
+; CHECK: test_yet_another_var:
+
+  ; @yet_another_var has a preferred alignment of 8, but that's not enough if
+  ; we're going to be linking against other things. Its ABI alignment is only 4
+  ; so we can't fold the load.
+  %val = load i64* bitcast({i32, i32}* @yet_another_var to i64*)
+; CHECK: adrp [[HIBITS:x[0-9]+]], yet_another_var
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:yet_another_var
+; CHECK: ldr x0, [x[[ADDR]]]
+  ret i64 %val
+}
+
+define i64()* @test_functions() {
+; CHECK: test_functions:
+  ret i64()* @test_yet_another_var
+; CHECK: adrp [[HIBITS:x[0-9]+]], test_yet_another_var
+; CHECK: add x0, [[HIBITS]], #:lo12:test_yet_another_var
+}
diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll
new file mode 100644
index 0000000..c474e58
--- /dev/null
+++ b/test/CodeGen/AArch64/got-abuse.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s
+
+; LLVM gives well-defined semantics to this horrible construct (though C says
+; it's undefined). Regardless, we shouldn't crash. The important feature here is
+; that in general the only way to access a GOT symbol is via a 64-bit
+; load. Neither of these alternatives has the ELF relocations required to
+; support it:
+;    + ldr wD, [xN, #:got_lo12:func]
+;    + add xD, xN, #:got_lo12:func
+
+declare void @consume(i32)
+declare void @func()
+
+define void @foo() nounwind {
+; CHECK: foo:
+entry:
+  call void @consume(i32 ptrtoint (void ()* @func to i32))
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:func
+; CHECK: ldr {{x[0-9]+}}, [x[[ADDRHI]], #:got_lo12:func]
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll
new file mode 100644
index 0000000..f019ea0
--- /dev/null
+++ b/test/CodeGen/AArch64/i128-align.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+%struct = type { i32, i128, i8 }
+
+@var = global %struct zeroinitializer
+
+define i64 @check_size() {
+; CHECK: check_size:
+  %starti = ptrtoint %struct* @var to i64
+
+  %endp = getelementptr %struct* @var, i64 1
+  %endi = ptrtoint %struct* %endp to i64
+
+  %diff = sub i64 %endi, %starti
+  ret i64 %diff
+; CHECK: movz x0, #48
+}
+
+define i64 @check_field() {
+; CHECK: check_field:
+  %starti = ptrtoint %struct* @var to i64
+
+  %endp = getelementptr %struct* @var, i64 0, i32 1
+  %endi = ptrtoint i128* %endp to i64
+
+  %diff = sub i64 %endi, %starti
+  ret i64 %diff
+; CHECK: movz x0, #16
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll
new file mode 100644
index 0000000..446151b
--- /dev/null
+++ b/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -0,0 +1,221 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+@varfp128 = global fp128 zeroinitializer
+
+declare float @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare fp128 @llvm.cos.f128(fp128)
+
+define void @test_cos(float %float, double %double, fp128 %fp128) {
+; CHECK: test_cos:
+
+   %cosfloat = call float @llvm.cos.f32(float %float)
+   store float %cosfloat, float* @varfloat
+; CHECK: bl cosf
+
+   %cosdouble = call double @llvm.cos.f64(double %double)
+   store double %cosdouble, double* @vardouble
+; CHECK: bl cos
+
+   %cosfp128 = call fp128 @llvm.cos.f128(fp128 %fp128)
+   store fp128 %cosfp128, fp128* @varfp128
+; CHECK: bl cosl
+
+  ret void
+}
+
+declare float @llvm.exp.f32(float)
+declare double @llvm.exp.f64(double)
+declare fp128 @llvm.exp.f128(fp128)
+
+define void @test_exp(float %float, double %double, fp128 %fp128) {
+; CHECK: test_exp:
+
+   %expfloat = call float @llvm.exp.f32(float %float)
+   store float %expfloat, float* @varfloat
+; CHECK: bl expf
+
+   %expdouble = call double @llvm.exp.f64(double %double)
+   store double %expdouble, double* @vardouble
+; CHECK: bl exp
+
+   %expfp128 = call fp128 @llvm.exp.f128(fp128 %fp128)
+   store fp128 %expfp128, fp128* @varfp128
+; CHECK: bl expl
+
+  ret void
+}
+
+declare float @llvm.exp2.f32(float)
+declare double @llvm.exp2.f64(double)
+declare fp128 @llvm.exp2.f128(fp128)
+
+define void @test_exp2(float %float, double %double, fp128 %fp128) {
+; CHECK: test_exp2:
+
+   %exp2float = call float @llvm.exp2.f32(float %float)
+   store float %exp2float, float* @varfloat
+; CHECK: bl exp2f
+
+   %exp2double = call double @llvm.exp2.f64(double %double)
+   store double %exp2double, double* @vardouble
+; CHECK: bl exp2
+
+   %exp2fp128 = call fp128 @llvm.exp2.f128(fp128 %fp128)
+   store fp128 %exp2fp128, fp128* @varfp128
+; CHECK: bl exp2l
+  ret void
+
+}
+
+declare float @llvm.log.f32(float)
+declare double @llvm.log.f64(double)
+declare fp128 @llvm.log.f128(fp128)
+
+define void @test_log(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log:
+
+   %logfloat = call float @llvm.log.f32(float %float)
+   store float %logfloat, float* @varfloat
+; CHECK: bl logf
+
+   %logdouble = call double @llvm.log.f64(double %double)
+   store double %logdouble, double* @vardouble
+; CHECK: bl log
+
+   %logfp128 = call fp128 @llvm.log.f128(fp128 %fp128)
+   store fp128 %logfp128, fp128* @varfp128
+; CHECK: bl logl
+
+  ret void
+}
+
+declare float @llvm.log2.f32(float)
+declare double @llvm.log2.f64(double)
+declare fp128 @llvm.log2.f128(fp128)
+
+define void @test_log2(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log2:
+
+   %log2float = call float @llvm.log2.f32(float %float)
+   store float %log2float, float* @varfloat
+; CHECK: bl log2f
+
+   %log2double = call double @llvm.log2.f64(double %double)
+   store double %log2double, double* @vardouble
+; CHECK: bl log2
+
+   %log2fp128 = call fp128 @llvm.log2.f128(fp128 %fp128)
+   store fp128 %log2fp128, fp128* @varfp128
+; CHECK: bl log2l
+  ret void
+
+}
+
+declare float @llvm.log10.f32(float)
+declare double @llvm.log10.f64(double)
+declare fp128 @llvm.log10.f128(fp128)
+
+define void @test_log10(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log10:
+
+   %log10float = call float @llvm.log10.f32(float %float)
+   store float %log10float, float* @varfloat
+; CHECK: bl log10f
+
+   %log10double = call double @llvm.log10.f64(double %double)
+   store double %log10double, double* @vardouble
+; CHECK: bl log10
+
+   %log10fp128 = call fp128 @llvm.log10.f128(fp128 %fp128)
+   store fp128 %log10fp128, fp128* @varfp128
+; CHECK: bl log10l
+
+  ret void
+}
+
+declare float @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare fp128 @llvm.sin.f128(fp128)
+
+define void @test_sin(float %float, double %double, fp128 %fp128) {
+; CHECK: test_sin:
+
+   %sinfloat = call float @llvm.sin.f32(float %float)
+   store float %sinfloat, float* @varfloat
+; CHECK: bl sinf
+
+   %sindouble = call double @llvm.sin.f64(double %double)
+   store double %sindouble, double* @vardouble
+; CHECK: bl sin
+
+   %sinfp128 = call fp128 @llvm.sin.f128(fp128 %fp128)
+   store fp128 %sinfp128, fp128* @varfp128
+; CHECK: bl sinl
+  ret void
+
+}
+
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+declare fp128 @llvm.pow.f128(fp128, fp128)
+
+define void @test_pow(float %float, double %double, fp128 %fp128) {
+; CHECK: test_pow:
+
+   %powfloat = call float @llvm.pow.f32(float %float, float %float)
+   store float %powfloat, float* @varfloat
+; CHECK: bl powf
+
+   %powdouble = call double @llvm.pow.f64(double %double, double %double)
+   store double %powdouble, double* @vardouble
+; CHECK: bl pow
+
+   %powfp128 = call fp128 @llvm.pow.f128(fp128 %fp128, fp128 %fp128)
+   store fp128 %powfp128, fp128* @varfp128
+; CHECK: bl powl
+
+  ret void
+}
+
+declare float @llvm.powi.f32(float, i32)
+declare double @llvm.powi.f64(double, i32)
+declare fp128 @llvm.powi.f128(fp128, i32)
+
+define void @test_powi(float %float, double %double, i32 %exponent, fp128 %fp128) {
+; CHECK: test_powi:
+
+   %powifloat = call float @llvm.powi.f32(float %float, i32 %exponent)
+   store float %powifloat, float* @varfloat
+; CHECK: bl __powisf2
+
+   %powidouble = call double @llvm.powi.f64(double %double, i32 %exponent)
+   store double %powidouble, double* @vardouble
+; CHECK: bl __powidf2
+
+   %powifp128 = call fp128 @llvm.powi.f128(fp128 %fp128, i32 %exponent)
+   store fp128 %powifp128, fp128* @varfp128
+; CHECK: bl __powitf2
+  ret void
+
+}
+
+define void @test_frem(float %float, double %double, fp128 %fp128) {
+; CHECK: test_frem:
+
+  %fremfloat = frem float %float, %float
+  store float %fremfloat, float* @varfloat
+; CHECK: bl fmodf
+
+  %fremdouble = frem double %double, %double
+  store double %fremdouble, double* @vardouble
+; CHECK: bl fmod
+
+  %fremfp128 = frem fp128 %fp128, %fp128
+  store fp128 %fremfp128, fp128* @varfp128
+; CHECK: bl fmodl
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
new file mode 100644
index 0000000..d80be8f
--- /dev/null
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s
+
+define internal void @_GLOBAL__I_a() section ".text.startup" {
+  ret void
+}
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+; CHECK: .section .init_array
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
new file mode 100644
index 0000000..c39c57f
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; Out of range immediate for I.
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 4096)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
new file mode 100644
index 0000000..47c5f98
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 13)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
new file mode 100644
index 0000000..7a5b99e
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and w0, w0, $0", "K"(i64 4294967296)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
new file mode 100644
index 0000000..4f00398
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and x0, x0, $0", "L"(i32 13)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll
new file mode 100644
index 0000000..c232f32
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints.ll
@@ -0,0 +1,117 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i64 @test_inline_constraint_r(i64 %base, i32 %offset) {
+; CHECK: test_inline_constraint_r:
+  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset)
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+  ret i64 %val
+}
+
+define i16 @test_small_reg(i16 %lhs, i16 %rhs) {
+; CHECK: test_small_reg:
+  %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs)
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+  ret i16 %val
+}
+
+define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) {
+; CHECK: test_inline_constraint_r_imm:
+  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12)
+; CHECK: movz [[FOUR:x[0-9]+]], #4
+; CHECK: movz [[TWELVE:w[0-9]+]], #12
+; CHECK: add {{x[0-9]+}}, [[FOUR]], [[TWELVE]], sxtw
+  ret i64 %val
+}
+
+; m is permitted to have a base/offset form. We don't do that
+; currently though.
+define i32 @test_inline_constraint_m(i32 *%ptr) {
+; CHECK: test_inline_constraint_m:
+  %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr)
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+  ret i32 %val
+}
+
+@arr = global [8 x i32] zeroinitializer
+
+; Q should *never* have base/offset form even if given the chance.
+define i32 @test_inline_constraint_Q(i32 *%ptr) {
+; CHECK: test_inline_constraint_Q:
+  %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1))
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+  ret i32 %val
+}
+
+@dump = global fp128 zeroinitializer
+
+define void @test_inline_constraint_I() {
+; CHECK: test_inline_constraint_I:
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 0)
+  call void asm sideeffect "add x0, x0, $0", "I"(i64 4095)
+; CHECK: add x0, x0, #0
+; CHECK: add x0, x0, #4095
+
+  ret void
+}
+
+; Skip J because it's useless
+
+define void @test_inline_constraint_K() {
+; CHECK: test_inline_constraint_K:
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 65535)
+; CHECK: and w0, w0, #-1431655766
+; CHECK: and w0, w0, #65535
+
+  ret void
+}
+
+define void @test_inline_constraint_L() {
+; CHECK: test_inline_constraint_L:
+  call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa
+  call void asm sideeffect "and x0, x0, $0", "L"(i64 65535)
+; CHECK: and x0, x0, #4294967296
+; CHECK: and x0, x0, #65535
+
+  ret void
+}
+
+; Skip M and N because we don't support MOV pseudo-instructions yet.
+
+@var = global i32 0
+
+define void @test_inline_constraint_S() {
+; CHECK: test_inline_constraint_S:
+  call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
+  call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var)
+; CHECK: adrp x0, var
+; CHECK: adrp x0, var
+; CHECK: add x0, x0, #:lo12:var
+  ret void
+}
+
+define i32 @test_inline_constraint_S_label(i1 %in) {
+; CHECK: test_inline_constraint_S_label:
+  call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
+; CHECK: adr x0, .Ltmp{{[0-9]+}}
+  br i1 %in, label %loc, label %loc2
+loc:
+  ret i32 0
+loc2:
+  ret i32 42
+}
+
+define void @test_inline_constraint_Y() {
+; CHECK: test_inline_constraint_Y:
+  call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0)
+; CHECK: fcmp s0, #0.0
+  ret void
+}
+
+define void @test_inline_constraint_Z() {
+; CHECK: test_inline_constraint_Z:
+  call void asm sideeffect "cmp w0, $0", "Z"(i32 0)
+; CHECK: cmp w0, #0
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll
new file mode 100644
index 0000000..3b55945
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-modifiers.ll
@@ -0,0 +1,125 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var_simple = hidden global i32 0
+@var_got = global i32 0
+@var_tlsgd = thread_local global i32 0
+@var_tlsld = thread_local(localdynamic) global i32 0
+@var_tlsie = thread_local(initialexec) global i32 0
+@var_tlsle = thread_local(localexec) global i32 0
+
+define void @test_inline_modifier_L() nounwind {
+; CHECK: test_inline_modifier_L:
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsld)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_tlsie)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsle)
+; CHECK: add x0, x0, #:lo12:var_simple
+; CHECK: ldr x0, [x0, #:got_lo12:var_got]
+; CHECK: add x0, x0, #:tlsdesc_lo12:var_tlsgd
+; CHECK: add x0, x0, #:dtprel_lo12:var_tlsld
+; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie]
+; CHECK: add x0, x0, #:tprel_lo12:var_tlsle
+
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC var_simple
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var_got
+; CHECK-ELF: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
+; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
+; CHECK-ELF: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
+; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
+
+  ret void
+}
+
+define void @test_inline_modifier_G() nounwind {
+; CHECK: test_inline_modifier_G:
+  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld)
+  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle)
+; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
+; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
+
+; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld
+; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle
+
+  ret void
+}
+
+define void @test_inline_modifier_A() nounwind {
+; CHECK: test_inline_modifier_A:
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsie)
+  ; N.b. All tprel and dtprel relocs are modified: lo12 or granules.
+; CHECK: adrp x0, var_simple
+; CHECK: adrp x0, :got:var_got
+; CHECK: adrp x0, :tlsdesc:var_tlsgd
+; CHECK: adrp x0, :gottprel:var_tlsie
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 var_simple
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var_got
+; CHECK-ELF: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
+; CHECK-ELF: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
+
+  ret void
+}
+
+define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind {
+; CHECK: test_inline_modifier_wx:
+  call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small)
+  call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small)
+  call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small)
+; CHECK: //APP
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+
+  call i64 asm sideeffect "add $0, $0, $0", "=r,0"(i64 %big)
+  call i64 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i64 %big)
+  call i64 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i64 %big)
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+
+  call i32 asm sideeffect "add ${0:w}, ${1:w}, ${1:w}", "=r,r"(i32 0)
+  call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0)
+; CHECK: add {{w[0-9]+}}, wzr, wzr
+; CHECK: add {{x[0-9]+}}, xzr, xzr
+  ret void
+}
+
+define void @test_inline_modifier_bhsdq() nounwind {
+; CHECK: test_inline_modifier_bhsdq:
+  call float asm sideeffect "ldr ${0:b}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:h}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:s}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:d}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:q}, [sp]", "=w"()
+; CHECK: ldr b0, [sp]
+; CHECK: ldr h0, [sp]
+; CHECK: ldr s0, [sp]
+; CHECK: ldr d0, [sp]
+; CHECK: ldr q0, [sp]
+
+  call double asm sideeffect "ldr ${0:b}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:h}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:s}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:d}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:q}, [sp]", "=w"()
+; CHECK: ldr b0, [sp]
+; CHECK: ldr h0, [sp]
+; CHECK: ldr s0, [sp]
+; CHECK: ldr d0, [sp]
+; CHECK: ldr q0, [sp]
+  ret void
+}
+
+define void @test_inline_modifier_c() nounwind {
+; CHECK: test_inline_modifier_c:
+  call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3)
+; CHECK: adr x0, 3
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
new file mode 100644
index 0000000..dcf9f4e
--- /dev/null
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -0,0 +1,56 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s -check-prefix=CHECK-ELF
+
+define i32 @test_jumptable(i32 %in) {
+; CHECK: test_jumptable
+
+  switch i32 %in, label %def [
+    i32 0, label %lbl1
+    i32 1, label %lbl2
+    i32 2, label %lbl3
+    i32 4, label %lbl4
+  ]
+; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0
+; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], #:lo12:.LJTI0_0
+; CHECK: ldr [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #3]
+; CHECK: br [[DEST]]
+
+def:
+  ret i32 0
+
+lbl1:
+  ret i32 1
+
+lbl2:
+  ret i32 2
+
+lbl3:
+  ret i32 4
+
+lbl4:
+  ret i32 8
+
+}
+
+; CHECK: .rodata
+
+; CHECK: .LJTI0_0:
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+
+; ELF tests:
+
+; First make sure we get a page/lo12 pair in .text to pick up the jump-table
+; CHECK-ELF: .rela.text
+; CHECK-ELF: ('r_sym', 0x00000008)
+; CHECK-ELF-NEXT: ('r_type', 0x00000113)
+; CHECK-ELF: ('r_sym', 0x00000008)
+; CHECK-ELF-NEXT: ('r_type', 0x00000115)
+
+; Also check the targets in .rodata are relocated
+; CHECK-ELF: .rela.rodata
+; CHECK-ELF: ('r_sym', 0x00000005)
+; CHECK-ELF-NEXT: ('r_type', 0x00000101)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll
new file mode 100644
index 0000000..2b2e129
--- /dev/null
+++ b/test/CodeGen/AArch64/large-frame.ll
@@ -0,0 +1,114 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+declare void @use_addr(i8*)
+
+@addr = global i8* null
+
+define void @test_bigframe() {
+; CHECK: test_bigframe:
+
+  %var1 = alloca i8, i32 20000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 20000000
+; CHECK: sub sp, sp, #496
+; CHECK: str x30, [sp, #488]
+  ; Total adjust is 39999536
+; CHECK: movz [[SUBCONST:x[0-9]+]], #22576
+; CHECK: movk [[SUBCONST]], #610, lsl #16
+; CHECK: sub sp, sp, [[SUBCONST]]
+
+  ; Total offset is 20000024
+; CHECK: movz [[VAR1OFFSET:x[0-9]+]], #11544
+; CHECK: movk [[VAR1OFFSET]], #305, lsl #16
+; CHECK: add {{x[0-9]+}}, sp, [[VAR1OFFSET]]
+  store volatile i8* %var1, i8** @addr
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+
+; CHECK: movz [[VAR2OFFSET:x[0-9]+]], #11528
+; CHECK: movk [[VAR2OFFSET]], #305, lsl #16
+; CHECK: add {{x[0-9]+}}, sp, [[VAR2OFFSET]]
+  store volatile i8* %var2, i8** @addr
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: movz [[ADDCONST:x[0-9]+]], #22576
+; CHECK: movk [[ADDCONST]], #610, lsl #16
+; CHECK: add sp, sp, [[ADDCONST]]
+  ret void
+}
+
+define void @test_mediumframe() {
+; CHECK: test_mediumframe:
+  %var1 = alloca i8, i32 1000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 1000000
+; CHECK: sub sp, sp, #496
+; CHECK: str x30, [sp, #488]
+; CHECK: sub sp, sp, #688
+; CHECK-NEXT: sub sp, sp, #488, lsl #12
+
+  store volatile i8* %var1, i8** @addr
+; CHECK: add [[VAR1ADDR:x[0-9]+]], sp, #600
+; CHECK: add [[VAR1ADDR]], [[VAR1ADDR]], #244, lsl #12
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+; CHECK: add [[VAR1PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
+
+  store volatile i8* %var2, i8** @addr
+; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #584
+; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #244, lsl #12
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+; CHECK: add [[VAR2PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: add sp, sp, #688
+; CHECK: add sp, sp, #488, lsl #12
+; CHECK: ldr x30, [sp, #488]
+; CHECK: add sp, sp, #496
+  ret void
+}
+
+
+@bigspace = global [8 x i64] zeroinitializer
+
+; If temporary registers are allocated for adjustment, they should *not* clobber
+; argument registers.
+define void @test_tempallocation([8 x i64] %val) nounwind {
+; CHECK: test_tempallocation:
+  %var = alloca i8, i32 1000000
+; CHECK: sub sp, sp,
+
+; Make sure the prologue is reasonably efficient
+; CHECK-NEXT: stp x29, x30, [sp,
+; CHECK-NEXT: stp x25, x26, [sp,
+; CHECK-NEXT: stp x23, x24, [sp,
+; CHECK-NEXT: stp x21, x22, [sp,
+; CHECK-NEXT: stp x19, x20, [sp,
+
+; Make sure we don't trash an argument register
+; CHECK-NOT: movz {{x[0-7],}}
+; CHECK: sub sp, sp,
+
+; CHECK-NOT: movz {{x[0-7],}}
+
+; CHECK: bl use_addr
+  call void @use_addr(i8* %var)
+
+  store [8 x i64] %val, [8 x i64]* @bigspace
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
new file mode 100644
index 0000000..4593512
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -0,0 +1,333 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_8bit:
+
+   %addr8_sxtw = getelementptr i8* %base, i32 %off32
+   %val8_sxtw = load volatile i8* %addr8_sxtw
+   %val32_signed = sext i8 %val8_sxtw to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %addr_lsl = getelementptr i8* %base, i64 %off64
+  %val8_lsl = load volatile i8* %addr_lsl
+  %val32_unsigned = zext i8 %val8_lsl to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %addrint_uxtw = ptrtoint i8* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
+  %val8_uxtw = load volatile i8* %addr_uxtw
+  %newval8 = add i8 %val8_uxtw, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+   ret void
+}
+
+
+define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_16bit:
+
+   %addr8_sxtwN = getelementptr i16* %base, i32 %off32
+   %val8_sxtwN = load volatile i16* %addr8_sxtwN
+   %val32_signed = sext i16 %val8_sxtwN to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #1]
+
+  %addr_lslN = getelementptr i16* %base, i64 %off64
+  %val8_lslN = load volatile i16* %addr_lslN
+  %val32_unsigned = zext i16 %val8_lslN to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #1]
+
+  %addrint_uxtw = ptrtoint i16* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
+  %val8_uxtw = load volatile i16* %addr_uxtw
+  %newval8 = add i16 %val8_uxtw, 1
+  store volatile i16 %newval8, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i16* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
+  %val16_sxtw = load volatile i16* %addr_sxtw
+  %val64_signed = sext i16 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_lsl = ptrtoint i16* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i16*
+  %val16_lsl = load volatile i16* %addr_lsl
+  %val64_unsigned = zext i16 %val16_lsl to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i16* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 1
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_uxtwN
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
+   ret void
+}
+
+define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_32bit:
+
+   %addr_sxtwN = getelementptr i32* %base, i32 %off32
+   %val_sxtwN = load volatile i32* %addr_sxtwN
+   store volatile i32 %val_sxtwN, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+
+  %addr_lslN = getelementptr i32* %base, i64 %off64
+  %val_lslN = load volatile i32* %addr_lslN
+  store volatile i32 %val_lslN, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+
+  %addrint_uxtw = ptrtoint i32* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
+  %val_uxtw = load volatile i32* %addr_uxtw
+  %newval8 = add i32 %val_uxtw, 1
+  store volatile i32 %newval8, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+
+  %base_sxtw = ptrtoint i32* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
+  %val16_sxtw = load volatile i32* %addr_sxtw
+  %val64_signed = sext i32 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_lsl = ptrtoint i32* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i32*
+  %val16_lsl = load volatile i32* %addr_lsl
+  %val64_unsigned = zext i32 %val16_lsl to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i32* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
+  %val32 = load volatile i32* @var_32bit
+  store volatile i32 %val32, i32* %addr_uxtwN
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_64bit:
+
+   %addr_sxtwN = getelementptr i64* %base, i32 %off32
+   %val_sxtwN = load volatile i64* %addr_sxtwN
+   store volatile i64 %val_sxtwN, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+
+  %addr_lslN = getelementptr i64* %base, i64 %off64
+  %val_lslN = load volatile i64* %addr_lslN
+  store volatile i64 %val_lslN, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+
+  %addrint_uxtw = ptrtoint i64* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
+  %val8_uxtw = load volatile i64* %addr_uxtw
+  %newval8 = add i64 %val8_uxtw, 1
+  store volatile i64 %newval8, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i64* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
+  %val64_sxtw = load volatile i64* %addr_sxtw
+  store volatile i64 %val64_sxtw, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint i64* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i64*
+  %val64_lsl = load volatile i64* %addr_lsl
+  store volatile i64 %val64_lsl, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i64* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
+  %val64 = load volatile i64* @var_64bit
+  store volatile i64 %val64, i64* %addr_uxtwN
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_float:
+
+   %addr_sxtwN = getelementptr float* %base, i32 %off32
+   %val_sxtwN = load volatile float* %addr_sxtwN
+   store volatile float %val_sxtwN, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+
+  %addr_lslN = getelementptr float* %base, i64 %off64
+  %val_lslN = load volatile float* %addr_lslN
+  store volatile float %val_lslN, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+
+  %addrint_uxtw = ptrtoint float* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to float*
+  %val_uxtw = load volatile float* %addr_uxtw
+  store volatile float %val_uxtw, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint float* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to float*
+  %val64_sxtw = load volatile float* %addr_sxtw
+  store volatile float %val64_sxtw, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint float* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to float*
+  %val64_lsl = load volatile float* %addr_lsl
+  store volatile float %val64_lsl, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint float* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to float*
+  %val64 = load volatile float* @var_float
+  store volatile float %val64, float* %addr_uxtwN
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_double:
+
+   %addr_sxtwN = getelementptr double* %base, i32 %off32
+   %val_sxtwN = load volatile double* %addr_sxtwN
+   store volatile double %val_sxtwN, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+
+  %addr_lslN = getelementptr double* %base, i64 %off64
+  %val_lslN = load volatile double* %addr_lslN
+  store volatile double %val_lslN, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+
+  %addrint_uxtw = ptrtoint double* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to double*
+  %val_uxtw = load volatile double* %addr_uxtw
+  store volatile double %val_uxtw, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint double* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to double*
+  %val64_sxtw = load volatile double* %addr_sxtw
+  store volatile double %val64_sxtw, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint double* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to double*
+  %val64_lsl = load volatile double* %addr_lsl
+  store volatile double %val64_lsl, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint double* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to double*
+  %val64 = load volatile double* @var_double
+  store volatile double %val64, double* %addr_uxtwN
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+
+define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_128bit:
+
+   %addr_sxtwN = getelementptr fp128* %base, i32 %off32
+   %val_sxtwN = load volatile fp128* %addr_sxtwN
+   store volatile fp128 %val_sxtwN, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+
+  %addr_lslN = getelementptr fp128* %base, i64 %off64
+  %val_lslN = load volatile fp128* %addr_lslN
+  store volatile fp128 %val_lslN, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
+
+  %addrint_uxtw = ptrtoint fp128* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to fp128*
+  %val_uxtw = load volatile fp128* %addr_uxtw
+  store volatile fp128 %val_uxtw, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint fp128* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to fp128*
+  %val64_sxtw = load volatile fp128* %addr_sxtw
+  store volatile fp128 %val64_sxtw, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint fp128* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to fp128*
+  %val64_lsl = load volatile fp128* %addr_lsl
+  store volatile fp128 %val64_lsl, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint fp128* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 4
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to fp128*
+  %val64 = load volatile fp128* %base
+  store volatile fp128 %val64, fp128* %addr_uxtwN
+; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #4]
+   ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
new file mode 100644
index 0000000..78a3c83
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -0,0 +1,218 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+@varptr = global i8* null
+
+define void @ldst_8bit() {
+; CHECK: ldst_8bit:
+
+; No architectural support for loads to 16-bit or 8-bit since we
+; promote i8 during lowering.
+  %addr_8bit = load i8** @varptr
+
+; match a sign-extending load 8-bit -> 32-bit
+   %addr_sext32 = getelementptr i8* %addr_8bit, i64 -256
+   %val8_sext32 = load volatile i8* %addr_sext32
+   %val32_signed = sext i8 %val8_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldursb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; match a zero-extending load volatile 8-bit -> 32-bit
+  %addr_zext32 = getelementptr i8* %addr_8bit, i64 -12
+  %val8_zext32 = load volatile i8* %addr_zext32
+  %val32_unsigned = zext i8 %val8_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-12]
+
+; match an any-extending load volatile 8-bit -> 32-bit
+  %addr_anyext = getelementptr i8* %addr_8bit, i64 -1
+  %val8_anyext = load volatile i8* %addr_anyext
+  %newval8 = add i8 %val8_anyext, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+; match a sign-extending load volatile 8-bit -> 64-bit
+  %addr_sext64 = getelementptr i8* %addr_8bit, i64 -5
+  %val8_sext64 = load volatile i8* %addr_sext64
+  %val64_signed = sext i8 %val8_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursb {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+; match a zero-extending load volatile 8-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %addr_zext64 = getelementptr i8* %addr_8bit, i64 -9
+  %val8_zext64 = load volatile i8* %addr_zext64
+  %val64_unsigned = zext i8 %val8_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-9]
+
+; truncating store volatile 32-bits to 8-bits
+  %addr_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+  %val32 = load volatile i32* @var_32bit
+  %val8_trunc32 = trunc i32 %val32 to i8
+  store volatile i8 %val8_trunc32, i8* %addr_trunc32
+; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; truncating store volatile 64-bits to 8-bits
+  %addr_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+  %val64 = load volatile i64* @var_64bit
+  %val8_trunc64 = trunc i64 %val64 to i8
+  store volatile i8 %val8_trunc64, i8* %addr_trunc64
+; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+   ret void
+}
+
+define void @ldst_16bit() {
+; CHECK: ldst_16bit:
+
+; No architectural support for loads to 16-bit or 16-bit since we
+; promote i16 during lowering.
+  %addr_8bit = load i8** @varptr
+
+; match a sign-extending load 16-bit -> 32-bit
+   %addr8_sext32 = getelementptr i8* %addr_8bit, i64 -256
+   %addr_sext32 = bitcast i8* %addr8_sext32 to i16*
+   %val16_sext32 = load volatile i16* %addr_sext32
+   %val32_signed = sext i16 %val16_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldursh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; match a zero-extending load volatile 16-bit -> 32-bit. With offset that would be unaligned.
+  %addr8_zext32 = getelementptr i8* %addr_8bit, i64 15
+  %addr_zext32 = bitcast i8* %addr8_zext32 to i16*
+  %val16_zext32 = load volatile i16* %addr_zext32
+  %val32_unsigned = zext i16 %val16_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #15]
+
+; match an any-extending load volatile 16-bit -> 32-bit
+  %addr8_anyext = getelementptr i8* %addr_8bit, i64 -1
+  %addr_anyext = bitcast i8* %addr8_anyext to i16*
+  %val16_anyext = load volatile i16* %addr_anyext
+  %newval16 = add i16 %val16_anyext, 1
+  store volatile i16 %newval16, i16* @var_16bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+; match a sign-extending load volatile 16-bit -> 64-bit
+  %addr8_sext64 = getelementptr i8* %addr_8bit, i64 -5
+  %addr_sext64 = bitcast i8* %addr8_sext64 to i16*
+  %val16_sext64 = load volatile i16* %addr_sext64
+  %val64_signed = sext i16 %val16_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursh {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+; match a zero-extending load volatile 16-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %addr8_zext64 = getelementptr i8* %addr_8bit, i64 9
+  %addr_zext64 = bitcast i8* %addr8_zext64 to i16*
+  %val16_zext64 = load volatile i16* %addr_zext64
+  %val64_unsigned = zext i16 %val16_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #9]
+
+; truncating store volatile 32-bits to 16-bits
+  %addr8_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+  %addr_trunc32 = bitcast i8* %addr8_trunc32 to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_trunc32
+; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; truncating store volatile 64-bits to 16-bits
+  %addr8_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+  %addr_trunc64 = bitcast i8* %addr8_trunc64 to i16*
+  %val64 = load volatile i64* @var_64bit
+  %val16_trunc64 = trunc i64 %val64 to i16
+  store volatile i16 %val16_trunc64, i16* %addr_trunc64
+; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+   ret void
+}
+
+define void @ldst_32bit() {
+; CHECK: ldst_32bit:
+
+  %addr_8bit = load i8** @varptr
+
+; Straight 32-bit load/store
+  %addr32_8_noext = getelementptr i8* %addr_8bit, i64 1
+  %addr32_noext = bitcast i8* %addr32_8_noext to i32*
+  %val32_noext = load volatile i32* %addr32_noext
+  store volatile i32 %val32_noext, i32* %addr32_noext
+; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+
+; Zero-extension to 64-bits
+  %addr32_8_zext = getelementptr i8* %addr_8bit, i64 -256
+  %addr32_zext = bitcast i8* %addr32_8_zext to i32*
+  %val32_zext = load volatile i32* %addr32_zext
+  %val64_unsigned = zext i32 %val32_zext to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Sign-extension to 64-bits
+  %addr32_8_sext = getelementptr i8* %addr_8bit, i64 -12
+  %addr32_sext = bitcast i8* %addr32_8_sext to i32*
+  %val32_sext = load volatile i32* %addr32_sext
+  %val64_signed = sext i32 %val32_sext to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursw {{x[0-9]+}}, [{{x[0-9]+}}, #-12]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Truncation from 64-bits
+  %addr64_8_trunc = getelementptr i8* %addr_8bit, i64 255
+  %addr64_trunc = bitcast i8* %addr64_8_trunc to i64*
+  %addr32_8_trunc = getelementptr i8* %addr_8bit, i64 -20
+  %addr32_trunc = bitcast i8* %addr32_8_trunc to i32*
+
+  %val64_trunc = load volatile i64* %addr64_trunc
+  %val32_trunc = trunc i64 %val64_trunc to i32
+  store volatile i32 %val32_trunc, i32* %addr32_trunc
+; CHECK: ldur {{x[0-9]+}}, [{{x[0-9]+}}, #255]
+; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #-20]
+
+  ret void
+}
+
+define void @ldst_float() {
+; CHECK: ldst_float:
+
+  %addr_8bit = load i8** @varptr
+  %addrfp_8 = getelementptr i8* %addr_8bit, i64 -5
+  %addrfp = bitcast i8* %addrfp_8 to float*
+
+  %valfp = load volatile float* %addrfp
+; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+  store volatile float %valfp, float* %addrfp
+; CHECK: stur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+  ret void
+}
+
+define void @ldst_double() {
+; CHECK: ldst_double:
+
+  %addr_8bit = load i8** @varptr
+  %addrfp_8 = getelementptr i8* %addr_8bit, i64 4
+  %addrfp = bitcast i8* %addrfp_8 to double*
+
+  %valfp = load volatile double* %addrfp
+; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+
+  store volatile double %valfp, double* %addrfp
+; CHECK: stur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+
+   ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
new file mode 100644
index 0000000..1e7540d
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -0,0 +1,251 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+define void @ldst_8bit() {
+; CHECK: ldst_8bit:
+
+; No architectural support for loads to 16-bit or 8-bit since we
+; promote i8 during lowering.
+
+; match a sign-extending load 8-bit -> 32-bit
+   %val8_sext32 = load volatile i8* @var_8bit
+   %val32_signed = sext i8 %val8_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_8bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a zero-extending load volatile 8-bit -> 32-bit
+  %val8_zext32 = load volatile i8* @var_8bit
+  %val32_unsigned = zext i8 %val8_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match an any-extending load volatile 8-bit -> 32-bit
+  %val8_anyext = load volatile i8* @var_8bit
+  %newval8 = add i8 %val8_anyext, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a sign-extending load volatile 8-bit -> 64-bit
+  %val8_sext64 = load volatile i8* @var_8bit
+  %val64_signed = sext i8 %val8_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a zero-extending load volatile 8-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %val8_zext64 = load volatile i8* @var_8bit
+  %val64_unsigned = zext i8 %val8_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; truncating store volatile 32-bits to 8-bits
+  %val32 = load volatile i32* @var_32bit
+  %val8_trunc32 = trunc i32 %val32 to i8
+  store volatile i8 %val8_trunc32, i8* @var_8bit
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; truncating store volatile 64-bits to 8-bits
+  %val64 = load volatile i64* @var_64bit
+  %val8_trunc64 = trunc i64 %val64 to i8
+  store volatile i8 %val8_trunc64, i8* @var_8bit
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+   ret void
+}
+
+define void @ldst_16bit() {
+; CHECK: ldst_16bit:
+
+; No architectural support for load volatiles to 16-bit promote i16 during
+; lowering.
+
+; match a sign-extending load volatile 16-bit -> 32-bit
+  %val16_sext32 = load volatile i16* @var_16bit
+  %val32_signed = sext i16 %val16_sext32 to i32
+  store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_16bit
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a zero-extending load volatile 16-bit -> 32-bit
+  %val16_zext32 = load volatile i16* @var_16bit
+  %val32_unsigned = zext i16 %val16_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match an any-extending load volatile 16-bit -> 32-bit
+  %val16_anyext = load volatile i16* @var_16bit
+  %newval16 = add i16 %val16_anyext, 1
+  store volatile i16 %newval16, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a sign-extending load volatile 16-bit -> 64-bit
+  %val16_sext64 = load volatile i16* @var_16bit
+  %val64_signed = sext i16 %val16_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a zero-extending load volatile 16-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %val16_zext64 = load volatile i16* @var_16bit
+  %val64_unsigned = zext i16 %val16_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; truncating store volatile 32-bits to 16-bits
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* @var_16bit
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; truncating store volatile 64-bits to 16-bits
+  %val64 = load volatile i64* @var_64bit
+  %val16_trunc64 = trunc i64 %val64 to i16
+  store volatile i16 %val16_trunc64, i16* @var_16bit
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+  ret void
+}
+
+define void @ldst_32bit() {
+; CHECK: ldst_32bit:
+
+; Straight 32-bit load/store
+  %val32_noext = load volatile i32* @var_32bit
+  store volatile i32 %val32_noext, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+
+; Zero-extension to 64-bits
+  %val32_zext = load volatile i32* @var_32bit
+  %val64_unsigned = zext i32 %val32_zext to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Sign-extension to 64-bits
+  %val32_sext = load volatile i32* @var_32bit
+  %val64_signed = sext i32 %val32_sext to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Truncation from 64-bits
+  %val64_trunc = load volatile i64* @var_64bit
+  %val32_trunc = trunc i64 %val64_trunc to i32
+  store volatile i32 %val32_trunc, i32* @var_32bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+
+  ret void
+}
+
+@arr8 = global i8* null
+@arr16 = global i16* null
+@arr32 = global i32* null
+@arr64 = global i64* null
+
+; Now check that our selection copes with accesses more complex than a
+; single symbol. Permitted offsets should be folded into the loads and
+; stores. Since all forms use the same Operand it's only necessary to
+; check the various access-sizes involved.
+
+define void @ldst_complex_offsets() {
+; CHECK: ldst_complex_offsets
+  %arr8_addr = load volatile i8** @arr8
+; CHECK: adrp {{x[0-9]+}}, arr8
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr8]
+
+  %arr8_sub1_addr = getelementptr i8* %arr8_addr, i64 1
+  %arr8_sub1 = load volatile i8* %arr8_sub1_addr
+  store volatile i8 %arr8_sub1, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+
+  %arr8_sub4095_addr = getelementptr i8* %arr8_addr, i64 4095
+  %arr8_sub4095 = load volatile i8* %arr8_sub4095_addr
+  store volatile i8 %arr8_sub4095, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #4095]
+
+
+  %arr16_addr = load volatile i16** @arr16
+; CHECK: adrp {{x[0-9]+}}, arr16
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr16]
+
+  %arr16_sub1_addr = getelementptr i16* %arr16_addr, i64 1
+  %arr16_sub1 = load volatile i16* %arr16_sub1_addr
+  store volatile i16 %arr16_sub1, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #2]
+
+  %arr16_sub4095_addr = getelementptr i16* %arr16_addr, i64 4095
+  %arr16_sub4095 = load volatile i16* %arr16_sub4095_addr
+  store volatile i16 %arr16_sub4095, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #8190]
+
+
+  %arr32_addr = load volatile i32** @arr32
+; CHECK: adrp {{x[0-9]+}}, arr32
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr32]
+
+  %arr32_sub1_addr = getelementptr i32* %arr32_addr, i64 1
+  %arr32_sub1 = load volatile i32* %arr32_sub1_addr
+  store volatile i32 %arr32_sub1, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #4]
+
+  %arr32_sub4095_addr = getelementptr i32* %arr32_addr, i64 4095
+  %arr32_sub4095 = load volatile i32* %arr32_sub4095_addr
+  store volatile i32 %arr32_sub4095, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #16380]
+
+
+  %arr64_addr = load volatile i64** @arr64
+; CHECK: adrp {{x[0-9]+}}, arr64
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr64]
+
+  %arr64_sub1_addr = getelementptr i64* %arr64_addr, i64 1
+  %arr64_sub1 = load volatile i64* %arr64_sub1_addr
+  store volatile i64 %arr64_sub1, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
+
+  %arr64_sub4095_addr = getelementptr i64* %arr64_addr, i64 4095
+  %arr64_sub4095 = load volatile i64* %arr64_sub4095_addr
+  store volatile i64 %arr64_sub4095, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #32760]
+
+  ret void
+}
+
+define void @ldst_float() {
+; CHECK: ldst_float:
+
+   %valfp = load volatile float* @var_float
+; CHECK: adrp {{x[0-9]+}}, var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+
+  store volatile float %valfp, float* @var_float
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+
+   ret void
+}
+
+define void @ldst_double() {
+; CHECK: ldst_double:
+
+   %valfp = load volatile double* @var_double
+; CHECK: adrp {{x[0-9]+}}, var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+
+  store volatile double %valfp, double* @var_double
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+
+   ret void
+}
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
new file mode 100644
index 0000000..c5ce241
--- /dev/null
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll
new file mode 100644
index 0000000..e090841
--- /dev/null
+++ b/test/CodeGen/AArch64/literal_pools.ll
@@ -0,0 +1,55 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @foo() {
+; CHECK: foo:
+    %val32 = load i32* @var32
+    %val64 = load i64* @var64
+
+    %val32_lit32 = and i32 %val32, 123456785
+    store volatile i32 %val32_lit32, i32* @var32
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit32 = and i64 %val64, 305402420
+    store volatile i64 %val64_lit32, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit32signed = and i64 %val64, -12345678
+    store volatile i64 %val64_lit32signed, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit64 = and i64 %val64, 1234567898765432
+    store volatile i64 %val64_lit64, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    ret void
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @floating_lits() {
+; CHECK: floating_lits:
+
+  %floatval = load float* @varfloat
+  %newfloat = fadd float %floatval, 128.0
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
+; CHECK: ldr {{s[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK: fadd
+  store float %newfloat, float* @varfloat
+
+  %doubleval = load double* @vardouble
+  %newdouble = fadd double %doubleval, 129.0
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
+; CHECK: ldr {{d[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK: fadd
+  store double %newdouble, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
new file mode 100644
index 0000000..5cbf5a3
--- /dev/null
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -0,0 +1,57 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP %s
+
+; Make sure a reasonably sane prologue and epilogue are
+; generated. This test is not robust in the face of an frame-handling
+; evolving, but still has value for unrelated changes, I
+; believe.
+;
+; In particular, it will fail when ldp/stp are used for frame setup,
+; when FP-elim is implemented, and when addressing from FP is
+; implemented.
+
+@var = global i64 0
+@local_addr = global i64* null
+
+declare void @foo()
+
+define void @trivial_func() nounwind {
+; CHECK: trivial_func: // @trivial_func
+; CHECK-NEXT: // BB#0
+; CHECK-NEXT: ret
+
+  ret void
+}
+
+define void @trivial_fp_func() {
+; CHECK-WITHFP: trivial_fp_func:
+
+; CHECK-WITHFP: sub sp, sp, #16
+; CHECK-WITHFP: stp x29, x30, [sp]
+; CHECK-WITHFP-NEXT: mov x29, sp
+
+; Dont't really care, but it would be a Bad Thing if this came after the epilogue.
+; CHECK: bl foo
+  call void @foo()
+  ret void
+
+; CHECK-WITHFP: ldp x29, x30, [sp]
+; CHECK-WITHFP: add sp, sp, #16
+
+; CHECK-WITHFP: ret
+}
+
+define void @stack_local() {
+  %local_var = alloca i64
+; CHECK: stack_local:
+; CHECK: sub sp, sp, #16
+
+  %val = load i64* @var
+  store i64 %val, i64* %local_var
+; CHECK: str {{x[0-9]+}}, [sp, #{{[0-9]+}}]
+
+  store i64* %local_var, i64** @local_addr
+; CHECK: add {{x[0-9]+}}, sp, #{{[0-9]+}}
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll
new file mode 100644
index 0000000..5f3f4da
--- /dev/null
+++ b/test/CodeGen/AArch64/logical-imm.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_and(i32 %in32, i64 %in64) {
+; CHECK: test_and:
+
+  %val0 = and i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = and i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = and i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = and i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_orr(i32 %in32, i64 %in64) {
+; CHECK: test_orr:
+
+  %val0 = or i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = or i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = or i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = or i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_eor(i32 %in32, i64 %in64) {
+; CHECK: test_eor:
+
+  %val0 = xor i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = xor i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = xor i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = xor i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_mov(i32 %in32, i64 %in64) {
+; CHECK: test_mov:
+  %val0 = add i32 %in32, 2863311530
+  store i32 %val0, i32* @var32
+; CHECK: orr {{w[0-9]+}}, wzr, #0xaaaaaaaa
+
+  %val1 = add i64 %in64, 11068046444225730969
+  store i64 %val1, i64* @var64
+; CHECK: orr {{x[0-9]+}}, xzr, #0x9999999999999999
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
new file mode 100644
index 0000000..bbbfcc1
--- /dev/null
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -0,0 +1,224 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+
+@var1_32 = global i32 0
+@var2_32 = global i32 0
+
+@var1_64 = global i64 0
+@var2_64 = global i64 0
+
+define void @logical_32bit() {
+; CHECK: logical_32bit:
+  %val1 = load i32* @var1_32
+  %val2 = load i32* @var2_32
+
+  ; First check basic and/bic/or/orn/eor/eon patterns with no shift
+  %neg_val2 = xor i32 -1, %val2
+
+  %and_noshift = and i32 %val1, %val2
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %and_noshift, i32* @var1_32
+  %bic_noshift = and i32 %neg_val2, %val1
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %bic_noshift, i32* @var1_32
+
+  %or_noshift = or i32 %val1, %val2
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %or_noshift, i32* @var1_32
+  %orn_noshift = or i32 %neg_val2, %val1
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %orn_noshift, i32* @var1_32
+
+  %xor_noshift = xor i32 %val1, %val2
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %xor_noshift, i32* @var1_32
+  %xorn_noshift = xor i32 %neg_val2, %val1
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %xorn_noshift, i32* @var1_32
+
+  ; Check the maximum shift on each
+  %operand_lsl31 = shl i32 %val2, 31
+  %neg_operand_lsl31 = xor i32 -1, %operand_lsl31
+
+  %and_lsl31 = and i32 %val1, %operand_lsl31
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %and_lsl31, i32* @var1_32
+  %bic_lsl31 = and i32 %val1, %neg_operand_lsl31
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %bic_lsl31, i32* @var1_32
+
+  %or_lsl31 = or i32 %val1, %operand_lsl31
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %or_lsl31, i32* @var1_32
+  %orn_lsl31 = or i32 %val1, %neg_operand_lsl31
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %orn_lsl31, i32* @var1_32
+
+  %xor_lsl31 = xor i32 %val1, %operand_lsl31
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %xor_lsl31, i32* @var1_32
+  %xorn_lsl31 = xor i32 %val1, %neg_operand_lsl31
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %xorn_lsl31, i32* @var1_32
+
+  ; Check other shifts on a subset
+  %operand_asr10 = ashr i32 %val2, 10
+  %neg_operand_asr10 = xor i32 -1, %operand_asr10
+
+  %bic_asr10 = and i32 %val1, %neg_operand_asr10
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10
+  store volatile i32 %bic_asr10, i32* @var1_32
+  %xor_asr10 = xor i32 %val1, %operand_asr10
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10
+  store volatile i32 %xor_asr10, i32* @var1_32
+
+  %operand_lsr1 = lshr i32 %val2, 1
+  %neg_operand_lsr1 = xor i32 -1, %operand_lsr1
+
+  %orn_lsr1 = or i32 %val1, %neg_operand_lsr1
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1
+  store volatile i32 %orn_lsr1, i32* @var1_32
+  %xor_lsr1 = xor i32 %val1, %operand_lsr1
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1
+  store volatile i32 %xor_lsr1, i32* @var1_32
+
+  %operand_ror20_big = shl i32 %val2, 12
+  %operand_ror20_small = lshr i32 %val2, 20
+  %operand_ror20 = or i32 %operand_ror20_big, %operand_ror20_small
+  %neg_operand_ror20 = xor i32 -1, %operand_ror20
+
+  %xorn_ror20 = xor i32 %val1, %neg_operand_ror20
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20
+  store volatile i32 %xorn_ror20, i32* @var1_32
+  %and_ror20 = and i32 %val1, %operand_ror20
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20
+  store volatile i32 %and_ror20, i32* @var1_32
+
+  ret void
+}
+
+define void @logical_64bit() {
+; CHECK: logical_64bit:
+  %val1 = load i64* @var1_64
+  %val2 = load i64* @var2_64
+
+  ; First check basic and/bic/or/orn/eor/eon patterns with no shift
+  %neg_val2 = xor i64 -1, %val2
+
+  %and_noshift = and i64 %val1, %val2
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %and_noshift, i64* @var1_64
+  %bic_noshift = and i64 %neg_val2, %val1
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %bic_noshift, i64* @var1_64
+
+  %or_noshift = or i64 %val1, %val2
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %or_noshift, i64* @var1_64
+  %orn_noshift = or i64 %neg_val2, %val1
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %orn_noshift, i64* @var1_64
+
+  %xor_noshift = xor i64 %val1, %val2
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %xor_noshift, i64* @var1_64
+  %xorn_noshift = xor i64 %neg_val2, %val1
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %xorn_noshift, i64* @var1_64
+
+  ; Check the maximum shift on each
+  %operand_lsl63 = shl i64 %val2, 63
+  %neg_operand_lsl63 = xor i64 -1, %operand_lsl63
+
+  %and_lsl63 = and i64 %val1, %operand_lsl63
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %and_lsl63, i64* @var1_64
+  %bic_lsl63 = and i64 %val1, %neg_operand_lsl63
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %bic_lsl63, i64* @var1_64
+
+  %or_lsl63 = or i64 %val1, %operand_lsl63
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %or_lsl63, i64* @var1_64
+  %orn_lsl63 = or i64 %val1, %neg_operand_lsl63
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %orn_lsl63, i64* @var1_64
+
+  %xor_lsl63 = xor i64 %val1, %operand_lsl63
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %xor_lsl63, i64* @var1_64
+  %xorn_lsl63 = xor i64 %val1, %neg_operand_lsl63
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %xorn_lsl63, i64* @var1_64
+
+  ; Check other shifts on a subset
+  %operand_asr10 = ashr i64 %val2, 10
+  %neg_operand_asr10 = xor i64 -1, %operand_asr10
+
+  %bic_asr10 = and i64 %val1, %neg_operand_asr10
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10
+  store volatile i64 %bic_asr10, i64* @var1_64
+  %xor_asr10 = xor i64 %val1, %operand_asr10
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10
+  store volatile i64 %xor_asr10, i64* @var1_64
+
+  %operand_lsr1 = lshr i64 %val2, 1
+  %neg_operand_lsr1 = xor i64 -1, %operand_lsr1
+
+  %orn_lsr1 = or i64 %val1, %neg_operand_lsr1
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1
+  store volatile i64 %orn_lsr1, i64* @var1_64
+  %xor_lsr1 = xor i64 %val1, %operand_lsr1
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1
+  store volatile i64 %xor_lsr1, i64* @var1_64
+
+  ; Construct a rotate-right from a bunch of other logical
+  ; operations. DAGCombiner should ensure we the ROTR during
+  ; selection
+  %operand_ror20_big = shl i64 %val2, 44
+  %operand_ror20_small = lshr i64 %val2, 20
+  %operand_ror20 = or i64 %operand_ror20_big, %operand_ror20_small
+  %neg_operand_ror20 = xor i64 -1, %operand_ror20
+
+  %xorn_ror20 = xor i64 %val1, %neg_operand_ror20
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20
+  store volatile i64 %xorn_ror20, i64* @var1_64
+  %and_ror20 = and i64 %val1, %operand_ror20
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20
+  store volatile i64 %and_ror20, i64* @var1_64
+
+  ret void
+}
+
+define void @flag_setting() {
+; CHECK: flag_setting:
+  %val1 = load i64* @var1_64
+  %val2 = load i64* @var2_64
+
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: b.gt .L
+  %simple_and = and i64 %val1, %val2
+  %tst1 = icmp sgt i64 %simple_and, 0
+  br i1 %tst1, label %ret, label %test2
+
+test2:
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+; CHECK: b.lt .L
+  %shifted_op = shl i64 %val2, 63
+  %shifted_and = and i64 %val1, %shifted_op
+  %tst2 = icmp slt i64 %shifted_and, 0
+  br i1 %tst2, label %ret, label %test3
+
+test3:
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12
+; CHECK: b.gt .L
+  %asr_op = ashr i64 %val2, 12
+  %asr_and = and i64 %asr_op, %val1
+  %tst3 = icmp sgt i64 %asr_and, 0
+  br i1 %tst3, label %ret, label %other_exit
+
+other_exit:
+  store volatile i64 %val1, i64* @var1_64
+  ret void
+ret:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.s b/test/CodeGen/AArch64/logical_shifted_reg.s
new file mode 100644
index 0000000..89aea58
--- /dev/null
+++ b/test/CodeGen/AArch64/logical_shifted_reg.s
@@ -0,0 +1,208 @@
+	.file	"/home/timnor01/a64-trunk/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll"
+	.text
+	.globl	logical_32bit
+	.type	logical_32bit,@function
+logical_32bit:                          // @logical_32bit
+	.cfi_startproc
+// BB#0:
+	adrp	x0, var1_32
+	ldr	w1, [x0, #:lo12:var1_32]
+	adrp	x0, var2_32
+	ldr	w2, [x0, #:lo12:var2_32]
+	and	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orr	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w2, w1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	and	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orr	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2, asr #10
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, asr #10
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2, lsr #1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, lsr #1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w1, w2, ror #20
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	and	w1, w1, w2, ror #20
+	adrp	x0, var1_32
+	str	w1, [x0, #:lo12:var1_32]
+	ret
+.Ltmp0:
+	.size	logical_32bit, .Ltmp0-logical_32bit
+	.cfi_endproc
+
+	.globl	logical_64bit
+	.type	logical_64bit,@function
+logical_64bit:                          // @logical_64bit
+	.cfi_startproc
+// BB#0:
+	adrp	x0, var1_64
+	ldr	x0, [x0, #:lo12:var1_64]
+	adrp	x1, var2_64
+	ldr	x1, [x1, #:lo12:var2_64]
+	and	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orr	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x1, x0
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	and	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orr	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1, asr #10
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, asr #10
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1, lsr #1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, lsr #1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x0, x1, ror #20
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	and	x0, x0, x1, ror #20
+	adrp	x1, var1_64
+	str	x0, [x1, #:lo12:var1_64]
+	ret
+.Ltmp1:
+	.size	logical_64bit, .Ltmp1-logical_64bit
+	.cfi_endproc
+
+	.globl	flag_setting
+	.type	flag_setting,@function
+flag_setting:                           // @flag_setting
+	.cfi_startproc
+// BB#0:
+	sub	sp, sp, #16
+	adrp	x0, var1_64
+	ldr	x0, [x0, #:lo12:var1_64]
+	adrp	x1, var2_64
+	ldr	x1, [x1, #:lo12:var2_64]
+	tst	x0, x1
+	str	x0, [sp, #8]            // 8-byte Folded Spill
+	str	x1, [sp]                // 8-byte Folded Spill
+	b.gt .LBB2_4
+	b	.LBB2_1
+.LBB2_1:                                // %test2
+	ldr	x0, [sp, #8]            // 8-byte Folded Reload
+	ldr	x1, [sp]                // 8-byte Folded Reload
+	tst	x0, x1, lsl #63
+	b.lt .LBB2_4
+	b	.LBB2_2
+.LBB2_2:                                // %test3
+	ldr	x0, [sp, #8]            // 8-byte Folded Reload
+	ldr	x1, [sp]                // 8-byte Folded Reload
+	tst	x0, x1, asr #12
+	b.gt .LBB2_4
+	b	.LBB2_3
+.LBB2_3:                                // %other_exit
+	adrp	x0, var1_64
+	ldr	x1, [sp, #8]            // 8-byte Folded Reload
+	str	x1, [x0, #:lo12:var1_64]
+	add	sp, sp, #16
+	ret
+.LBB2_4:                                // %ret
+	add	sp, sp, #16
+	ret
+.Ltmp2:
+	.size	flag_setting, .Ltmp2-flag_setting
+	.cfi_endproc
+
+	.type	var1_32,@object         // @var1_32
+	.bss
+	.globl	var1_32
+	.align	2
+var1_32:
+	.word	0                       // 0x0
+	.size	var1_32, 4
+
+	.type	var2_32,@object         // @var2_32
+	.globl	var2_32
+	.align	2
+var2_32:
+	.word	0                       // 0x0
+	.size	var2_32, 4
+
+	.type	var1_64,@object         // @var1_64
+	.globl	var1_64
+	.align	3
+var1_64:
+	.xword	0                       // 0x0
+	.size	var1_64, 8
+
+	.type	var2_64,@object         // @var2_64
+	.globl	var2_64
+	.align	3
+var2_64:
+	.xword	0                       // 0x0
+	.size	var2_64, 8
+
+
diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll
new file mode 100644
index 0000000..afdf681
--- /dev/null
+++ b/test/CodeGen/AArch64/movw-consts.ll
@@ -0,0 +1,124 @@
+; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i64 @test0() {
+; CHECK: test0:
+; Not produced by move wide instructions, but good to make sure we can return 0 anyway:
+; CHECK: mov x0, xzr
+  ret i64 0
+}
+
+define i64 @test1() {
+; CHECK: test1:
+; CHECK: movz x0, #1
+  ret i64 1
+}
+
+define i64 @test2() {
+; CHECK: test2:
+; CHECK: movz x0, #65535
+  ret i64 65535
+}
+
+define i64 @test3() {
+; CHECK: test3:
+; CHECK: movz x0, #1, lsl #16
+  ret i64 65536
+}
+
+define i64 @test4() {
+; CHECK: test4:
+; CHECK: movz x0, #65535, lsl #16
+  ret i64 4294901760
+}
+
+define i64 @test5() {
+; CHECK: test5:
+; CHECK: movz x0, #1, lsl #32
+  ret i64 4294967296
+}
+
+define i64 @test6() {
+; CHECK: test6:
+; CHECK: movz x0, #65535, lsl #32
+  ret i64 281470681743360
+}
+
+define i64 @test7() {
+; CHECK: test7:
+; CHECK: movz x0, #1, lsl #48
+  ret i64 281474976710656
+}
+
+; A 32-bit MOVN can generate some 64-bit patterns that a 64-bit one
+; couldn't. Useful even for i64
+define i64 @test8() {
+; CHECK: test8:
+; CHECK: movn w0, #60875
+  ret i64 4294906420
+}
+
+define i64 @test9() {
+; CHECK: test9:
+; CHECK: movn x0, #0
+  ret i64 -1
+}
+
+define i64 @test10() {
+; CHECK: test10:
+; CHECK: movn x0, #60875, lsl #16
+  ret i64 18446744069720047615
+}
+
+; For reasonably legitimate reasons returning an i32 results in the
+; selection of an i64 constant, so we need a different idiom to test that selection
+@var32 = global i32 0
+
+define void @test11() {
+; CHECK: test11:
+; CHECK movz {{w[0-9]+}}, #0
+  store i32 0, i32* @var32
+  ret void
+}
+
+define void @test12() {
+; CHECK: test12:
+; CHECK: movz {{w[0-9]+}}, #1
+  store i32 1, i32* @var32
+  ret void
+}
+
+define void @test13() {
+; CHECK: test13:
+; CHECK: movz {{w[0-9]+}}, #65535
+  store i32 65535, i32* @var32
+  ret void
+}
+
+define void @test14() {
+; CHECK: test14:
+; CHECK: movz {{w[0-9]+}}, #1, lsl #16
+  store i32 65536, i32* @var32
+  ret void
+}
+
+define void @test15() {
+; CHECK: test15:
+; CHECK: movz {{w[0-9]+}}, #65535, lsl #16
+  store i32 4294901760, i32* @var32
+  ret void
+}
+
+define void @test16() {
+; CHECK: test16:
+; CHECK: movn {{w[0-9]+}}, #0
+  store i32 -1, i32* @var32
+  ret void
+}
+
+define i64 @test17() {
+; CHECK: test17:
+
+  ; Mustn't MOVN w0 here.
+; CHECK: movn x0, #2
+  ret i64 -3
+}
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
new file mode 100644
index 0000000..77bf691
--- /dev/null
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+
+; Make sure exception-handling PIC code can be linked correctly. An alternative
+; to the sequence described below would have .gcc_except_table itself writable
+; and not use the indirection, but this isn't what LLVM does right now.
+
+  ; There should be a read-only .gcc_except_table section...
+; CHECK: .section .gcc_except_table,"a"
+
+  ; ... referring indirectly to stubs for its typeinfo ...
+; CHECK: // @TType Encoding = indirect pcrel sdata8
+  ; ... one of which is "int"'s typeinfo
+; CHECK: .Ltmp9:
+; CHECK-NEXT: .xword  .L_ZTIi.DW.stub-.Ltmp9
+
+  ; .. and which is properly defined (in a writable section for the dynamic loader) later.
+; CHECK: .section .data.rel,"aw"
+; CHECK: .L_ZTIi.DW.stub:
+; CHECK-NEXT: .xword _ZTIi
+
+@_ZTIi = external constant i8*
+
+define i32 @_Z3barv() {
+entry:
+  invoke void @_Z3foov()
+          to label %return unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
+  %5 = bitcast i8* %4 to i32*
+  %exn.scalar = load i32* %5, align 4
+  tail call void @__cxa_end_catch() nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %catch
+  %retval.0 = phi i32 [ %exn.scalar, %catch ], [ 42, %entry ]
+  ret i32 %retval.0
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll
new file mode 100644
index 0000000..28dc9a7
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; CallingConv.td requires a bitcast for vector arguments. Make sure we're
+; actually capable of that (the test was omitted from LowerFormalArguments).
+
+define void @test_bitcast_lower(<2 x i32> %a) {
+; CHECK: test_bitcast_lower:
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll
new file mode 100644
index 0000000..b35185c
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; We used to not mark NZCV as being used in the continuation basic-block
+; when lowering a 128-bit "select" to branches. This meant a subsequent use
+; of the same flags gave an internal fault here.
+
+declare void @foo(fp128)
+
+define double @test_f128csel_flags(i32 %lhs, fp128 %a, fp128 %b) nounwind {
+; CHECK: test_f128csel_flags
+
+    %tst = icmp ne i32 %lhs, 42
+    %val = select i1 %tst, fp128 %a, fp128 %b
+; CHECK: cmp w0, #42
+; CHECK: b.eq .LBB0
+
+    call void @foo(fp128 %val)
+    %retval = select i1 %tst, double 4.0, double 5.0
+
+    ; It's also reasonably important that the actual fcsel comes before the
+    ; function call since bl may corrupt NZCV. We were doing the right thing anyway,
+    ; but just as well test it while we're here.
+; CHECK: fcsel {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, ne
+; CHECK: bl foo
+
+    ret double %retval
+}
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
new file mode 100644
index 0000000..8d5485c
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -0,0 +1,19 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+@var = global void()* zeroinitializer
+
+declare void @bar()
+
+define void @foo() {
+; CHECK: foo:
+       %func = load void()** @var
+
+       ; Calling a function encourages @foo to use a callee-saved register,
+       ; which makes it a natural choice for the tail call itself. But we don't
+       ; want that: the final "br xN" has to use a temporary or argument
+       ; register.
+       call void @bar()
+
+       tail call void %func()
+; CHECK: br {{x([0-79]|1[0-8])}}
+       ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
new file mode 100644
index 0000000..e54552f
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; When generating DAG selection tables, TableGen used to only flag an
+; instruction as needing a chain on its own account if it had a built-in pattern
+; which used the chain. This meant that the AArch64 load/stores weren't
+; recognised and so both loads from %locvar below were coalesced into a single
+; LS8_LDR instruction (same operands other than the non-existent chain) and the
+; increment was lost at return.
+
+; This was obviously a Bad Thing.
+
+declare void @bar(i8*)
+
+define i64 @test_chains() {
+; CHECK: test_chains:
+
+  %locvar = alloca i8
+
+  call void @bar(i8* %locvar)
+; CHECK: bl bar
+
+  %inc.1 = load i8* %locvar
+  %inc.2 = zext i8 %inc.1 to i64
+  %inc.3 = add i64 %inc.2, 1
+  %inc.4 = trunc i64 %inc.3 to i8
+  store i8 %inc.4, i8* %locvar
+; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR:#[0-9]+]]]
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1
+; CHECK: strb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+
+  %ret.1 = load i8* %locvar
+  %ret.2 = zext i8 %ret.1 to i64
+  ret i64 %ret.2
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
new file mode 100644
index 0000000..5c97a02
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+@var = global i32 0
+
+declare void @bar()
+
+define void @test_w29_reserved() {
+; CHECK: test_w29_reserved:
+; CHECK add x29, sp, #{{[0-9]+}}
+
+  %val1 = load volatile i32* @var
+  %val2 = load volatile i32* @var
+  %val3 = load volatile i32* @var
+  %val4 = load volatile i32* @var
+  %val5 = load volatile i32* @var
+  %val6 = load volatile i32* @var
+  %val7 = load volatile i32* @var
+  %val8 = load volatile i32* @var
+  %val9 = load volatile i32* @var
+
+; CHECK-NOT: ldr w29,
+
+  ; Call to prevent fp-elim that occurs regardless in leaf functions.
+  call void @bar()
+
+  store volatile i32 %val1,  i32* @var
+  store volatile i32 %val2,  i32* @var
+  store volatile i32 %val3,  i32* @var
+  store volatile i32 %val4,  i32* @var
+  store volatile i32 %val5,  i32* @var
+  store volatile i32 %val6,  i32* @var
+  store volatile i32 %val7,  i32* @var
+  store volatile i32 %val8,  i32* @var
+  store volatile i32 %val9,  i32* @var
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-wzr-allocatable.ll b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
new file mode 100644
index 0000000..764d2bc
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
@@ -0,0 +1,41 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0
+
+; When WZR wasn't marked as reserved, this function tried to allocate
+; it at O0 and then generated an internal fault (mostly incidentally)
+; when it discovered that it was already in use for a multiplication.
+
+; I'm not really convinced this is a good test since it could easily
+; stop testing what it does now with no-one any the wiser. However, I
+; can't think of a better way to force the allocator to use WZR
+; specifically.
+
+define void @test() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.body9, %for.end
+  br i1 undef, label %for.body9, label %while.cond30
+
+for.body9:                                        ; preds = %for.cond6
+  store i16 0, i16* undef, align 2
+  %0 = load i32* undef, align 4
+  %1 = load i32* undef, align 4
+  %mul15 = mul i32 %0, %1
+  %add16 = add i32 %mul15, 32768
+  %div = udiv i32 %add16, 65535
+  %add17 = add i32 %div, 1
+  store i32 %add17, i32* undef, align 4
+  br label %for.cond6
+
+while.cond30:                                     ; preds = %for.cond6
+  ret void
+}
diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll
new file mode 100644
index 0000000..d2eb77a
--- /dev/null
+++ b/test/CodeGen/AArch64/setcc-takes-i32.ll
@@ -0,0 +1,22 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; Most important point here is that the promotion of the i1 works
+; correctly. Previously LLVM thought that i64 was the appropriate SetCC output,
+; which meant it proceded in two steps and produced an i64 -> i64 any_ext which
+; couldn't be selected and faulted.
+
+; It was expecting the smallest legal promotion of i1 to be the preferred SetCC
+; type, so we'll satisfy it (this actually arguably gives better code anyway,
+; with flag-manipulation operations allowed to use W-registers).
+
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
+
+define i64 @test_select(i64 %lhs, i64 %rhs) {
+; CHECK: test_select:
+
+  %res = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %lhs, i64 %rhs)
+  %flag = extractvalue {i64, i1} %res, 1
+  %retval = select i1 %flag, i64 %lhs, i64 %rhs
+  ret i64 %retval
+; CHECK: ret
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
new file mode 100644
index 0000000..a1ec618
--- /dev/null
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -0,0 +1,97 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+declare void @callee_stack0()
+declare void @callee_stack8([8 x i32], i64)
+declare void @callee_stack16([8 x i32], i64, i64)
+
+define void @caller_to0_from0() nounwind {
+; CHECK: caller_to0_from0:
+; CHECK-NEXT: // BB
+  tail call void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define void @caller_to0_from8([8 x i32], i64) nounwind{
+; CHECK: caller_to0_from8:
+; CHECK-NEXT: // BB
+
+  tail call void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define void @caller_to8_from0() {
+; CHECK: caller_to8_from0:
+
+; Caller isn't going to clean up any extra stack we allocate, so it
+; can't be a tail call.
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: bl callee_stack8
+}
+
+define void @caller_to8_from8([8 x i32], i64 %a) {
+; CHECK: caller_to8_from8:
+; CHECK-NOT: sub sp, sp,
+
+; This should reuse our stack area for the 42
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp]
+; CHECK-NEXT: b callee_stack8
+}
+
+define void @caller_to16_from8([8 x i32], i64 %a) {
+; CHECK: caller_to16_from8:
+
+; Shouldn't be a tail call: we can't use SP+8 because our caller might
+; have something there. This may sound obvious but implementation does
+; some funky aligning.
+  tail call void @callee_stack16([8 x i32] undef, i64 undef, i64 undef)
+; CHECK: bl callee_stack16
+  ret void
+}
+
+define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
+; CHECK: caller_to8_from24:
+; CHECK-NOT: sub sp, sp
+
+; Reuse our area, putting "42" at incoming sp
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp]
+; CHECK-NEXT: b callee_stack8
+}
+
+define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
+; CHECK: caller_to16_from16:
+; CHECK-NOT: sub sp, sp,
+
+; Here we want to make sure that both loads happen before the stores:
+; otherwise either %a or %b will be wrongly clobbered.
+  tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
+  ret void
+
+; CHECK: ldr x0,
+; CHECK: ldr x1,
+; CHECK: str x1,
+; CHECK: str x0,
+
+; CHECK-NOT: add sp, sp,
+; CHECK: b callee_stack16
+}
+
+@func = global void(i32)* null
+
+define void @indirect_tail() {
+; CHECK: indirect_tail:
+; CHECK-NOT: sub sp, sp
+
+  %fptr = load void(i32)** @func
+  tail call void %fptr(i32 42)
+  ret void
+; CHECK: movz w0, #42
+; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, #:lo12:func]
+; CHECK: br [[FPTR]]
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
new file mode 100644
index 0000000..f323b15
--- /dev/null
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -0,0 +1,94 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
+
+declare fastcc void @callee_stack0()
+declare fastcc void @callee_stack8([8 x i32], i64)
+declare fastcc void @callee_stack16([8 x i32], i64, i64)
+
+define fastcc void @caller_to0_from0() nounwind {
+; CHECK: caller_to0_from0:
+; CHECK-NEXT: // BB
+  tail call fastcc void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define fastcc void @caller_to0_from8([8 x i32], i64) {
+; CHECK: caller_to0_from8:
+
+  tail call fastcc void @callee_stack0()
+  ret void
+; CHECK: add sp, sp, #16
+; CHECK-NEXT: b callee_stack0
+}
+
+define fastcc void @caller_to8_from0() {
+; CHECK: caller_to8_from0:
+; CHECK: sub sp, sp, #32
+
+; Key point is that the "42" should go #16 below incoming stack
+; pointer (we didn't have arg space to reuse).
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack8
+}
+
+define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
+; CHECK: caller_to8_from8:
+; CHECK: sub sp, sp, #16
+
+; Key point is that the "%a" should go where at SP on entry.
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack8
+}
+
+define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
+; CHECK: caller_to16_from8:
+; CHECK: sub sp, sp, #16
+
+; Important point is that the call reuses the "dead" argument space
+; above %a on the stack. If it tries to go below incoming-SP then the
+; callee will not deallocate the space, even in fastcc.
+  tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
+; CHECK: str {{x[0-9]+}}, [sp, #24]
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK: add sp, sp, #16
+; CHECK: b callee_stack16
+  ret void
+}
+
+
+define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
+; CHECK: caller_to8_from24:
+; CHECK: sub sp, sp, #16
+
+; Key point is that the "%a" should go where at #16 above SP on entry.
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #32]
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: b callee_stack8
+}
+
+
+define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
+; CHECK: caller_to16_from16:
+; CHECK: sub sp, sp, #16
+
+; Here we want to make sure that both loads happen before the stores:
+; otherwise either %a or %b will be wrongly clobbered.
+  tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
+  ret void
+
+; CHECK: ldr x0,
+; CHECK: ldr x1,
+; CHECK: str x1,
+; CHECK: str x0,
+
+; CHECK: add sp, sp, #16
+; CHECK: b callee_stack16
+}
diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll
new file mode 100644
index 0000000..bad2298
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-dynamic-together.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+
+; If the .tlsdesccall and blr parts are emitted completely separately (even with
+; glue) then LLVM will separate them quite happily (with a spill at O0, hence
+; the option). This is definitely wrong, so we make sure they are emitted
+; together.
+
+@general_dynamic_var = external thread_local global i32
+
+define i32 @test_generaldynamic() {
+; CHECK: test_generaldynamic:
+
+  %val = load i32* @general_dynamic_var
+  ret i32 %val
+
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr {{x[0-9]+}}
+}
diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll
new file mode 100644
index 0000000..cdfd117
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-dynamics.ll
@@ -0,0 +1,121 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+@general_dynamic_var = external thread_local global i32
+
+define i32 @test_generaldynamic() {
+; CHECK: test_generaldynamic:
+
+  %val = load i32* @general_dynamic_var
+  ret i32 %val
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], x0]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+define i32* @test_generaldynamic_addr() {
+; CHECK: test_generaldynamic_addr:
+
+  ret i32* @general_dynamic_var
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], x0
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+@local_dynamic_var = external thread_local(localdynamic) global i32
+
+define i32 @test_localdynamic() {
+; CHECK: test_localdynamic:
+
+  %val = load i32* @local_dynamic_var
+  ret i32 %val
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
+; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+
+; CHECK: ldr w0, [x0, [[DTP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+define i32* @test_localdynamic_addr() {
+; CHECK: test_localdynamic_addr:
+
+  ret i32* @local_dynamic_var
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
+; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+
+; CHECK: add x0, x0, [[DTP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+; The entire point of the local-dynamic access model is to have a single call to
+; the expensive resolver. Make sure we achieve that goal.
+
+@local_dynamic_var2 = external thread_local(localdynamic) global i32
+
+define i32 @test_localdynamic_deduplicate() {
+; CHECK: test_localdynamic_deduplicate:
+
+  %val = load i32* @local_dynamic_var
+  %val2 = load i32* @local_dynamic_var2
+
+  %sum = add i32 %val, %val2
+  ret i32 %sum
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK-NOT: _TLS_MODULE_BASE_
+
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll
new file mode 100644
index 0000000..a665884
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-execs.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+@initial_exec_var = external thread_local(initialexec) global i32
+
+define i32 @test_initial_exec() {
+; CHECK: test_initial_exec:
+  %val = load i32* @initial_exec_var
+
+; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
+; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+
+  ret i32 %val
+}
+
+define i32* @test_initial_exec_addr() {
+; CHECK: test_initial_exec_addr:
+  ret i32* @initial_exec_var
+
+; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
+; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+
+}
+
+@local_exec_var = thread_local(initialexec) global i32 0
+
+define i32 @test_local_exec() {
+; CHECK: test_local_exec:
+  %val = load i32* @local_exec_var
+
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+
+  ret i32 %val
+}
+
+define i32* @test_local_exec_addr() {
+; CHECK: test_local_exec_addr:
+  ret i32* @local_exec_var
+
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+}
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
new file mode 100644
index 0000000..65c1fda
--- /dev/null
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -0,0 +1,48 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; We've got the usual issues with LLVM reordering blocks here. The
+; tests are correct for the current order, but who knows when that
+; will change. Beware!
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i32 @test_tbz() {
+; CHECK: test_tbz:
+
+  %val = load i32* @var32
+  %val64 = load i64* @var64
+
+  %tbit0 = and i32 %val, 32768
+  %tst0 = icmp ne i32 %tbit0, 0
+  br i1 %tst0, label %test1, label %end1
+; CHECK: tbz {{w[0-9]+}}, #15, [[LBL_end1:.LBB0_[0-9]+]]
+
+test1:
+  %tbit1 = and i32 %val, 4096
+  %tst1 = icmp ne i32 %tbit1, 0
+  br i1 %tst1, label %test2, label %end1
+; CHECK: tbz {{w[0-9]+}}, #12, [[LBL_end1]]
+
+test2:
+  %tbit2 = and i64 %val64, 32768
+  %tst2 = icmp ne i64 %tbit2, 0
+  br i1 %tst2, label %test3, label %end1
+; CHECK: tbz {{x[0-9]+}}, #15, [[LBL_end1]]
+
+test3:
+  %tbit3 = and i64 %val64, 4096
+  %tst3 = icmp ne i64 %tbit3, 0
+  br i1 %tst3, label %end2, label %end1
+; CHECK: tbz {{x[0-9]+}}, #12, [[LBL_end1]]
+
+end2:
+; CHECK: movz x0, #1
+; CHECK-NEXT: ret
+  ret i32 1
+
+end1:
+; CHECK: [[LBL_end1]]:
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: ret
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
new file mode 100644
index 0000000..c5d319e
--- /dev/null
+++ b/test/CodeGen/AArch64/variadic.ll
@@ -0,0 +1,144 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+%va_list = type {i8*, i8*, i8*, i32, i32}
+
+@var = global %va_list zeroinitializer
+
+declare void @llvm.va_start(i8*)
+
+define void @test_simple(i32 %n, ...) {
+; CHECK: test_simple:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #112]
+; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK: str x7, [x[[GPRBASE]], #48]
+
+; Omit the middle ones
+
+; CHECK: str q0, [sp]
+; CHECK: str x1, [sp, #[[GPRFROMSP]]]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #127
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
+; CHECK: test_fewargs:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #96]
+; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK: str x7, [x[[GPRBASE]], #32]
+
+; Omit the middle ones
+
+; CHECK: str q1, [sp]
+; CHECK: str x3, [sp, #[[GPRFROMSP]]]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #111
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #39
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #112
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #40
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+define void @test_nospare([8 x i64], [8 x float], ...) {
+; CHECK: test_nospare:
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK-NOT: sub sp, sp
+; CHECK: mov [[STACK:x[0-9]+]], sp
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+; If there are non-variadic arguments on the stack (here two i64s) then the
+; __stack field should point just past them.
+define void @test_offsetstack([10 x i64], [3 x float], ...) {
+; CHECK: test_offsetstack:
+; CHECK: sub sp, sp, #80
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #64]
+
+; CHECK-NOT: str x{{[0-9]+}},
+; Omit the middle ones
+
+; CHECK: str q3, [sp]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #79
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: str wzr, [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #80
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[STACK:x[0-9]+]], sp, #96
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+declare void @llvm.va_end(i8*)
+
+define void @test_va_end() nounwind {
+; CHECK: test_va_end:
+; CHECK-NEXT: BB#0
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_end(i8* %addr)
+
+  ret void
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.va_copy(i8* %dest, i8* %src)
+
+@second_list = global %va_list zeroinitializer
+
+define void @test_va_copy() {
+; CHECK: test_va_copy:
+  %srcaddr = bitcast %va_list* @var to i8*
+  %dstaddr = bitcast %va_list* @second_list to i8*
+  call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
+
+; Check beginning and end again:
+
+; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
+
+; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
+; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+
+; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll
new file mode 100644
index 0000000..fef0437
--- /dev/null
+++ b/test/CodeGen/AArch64/zero-reg.ll
@@ -0,0 +1,31 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_zr() {
+; CHECK: test_zr:
+
+  store i32 0, i32* @var32
+; CHECK: str wzr, [{{x[0-9]+}}, #:lo12:var32]
+  store i64 0, i64* @var64
+; CHECK: str xzr, [{{x[0-9]+}}, #:lo12:var64]
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_sp(i32 %val) {
+; CHECK: test_sp:
+
+; Important correctness point here is that LLVM doesn't try to use xzr
+; as an addressing register: "str w0, [xzr]" is not a valid A64
+; instruction (0b11111 in the Rn field would mean "sp").
+  %addr = getelementptr i32* null, i64 0
+  store i32 %val, i32* %addr
+; CHECK: mov x[[NULL:[0-9]+]], xzr
+; CHECK: str {{w[0-9]+}}, [x[[NULL]]]
+
+  ret void
+; CHECK: ret
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index f9ede74..0d0d03b 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -8,7 +8,7 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
 
 ; CHECK:      movw    r1, :lower16:{{.*}}
 ; CHECK:      movt    r1, :upper16:{{.*}}
-; CHECK:      vld1.64 {{.*}}, [r1, :128]
+; CHECK:      vld1.64 {{.*}}, [r1:128]
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
@@ -252,7 +252,7 @@ define void @test_powi(<4 x float>* %X) nounwind {
 
 ; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:       movt  [[reg0]], :upper16:{{.*}}
-; CHECK:       vld1.64 {{.*}}, :128
+; CHECK:       vld1.64 {{.*}}:128
 ; CHECK:       vmul.f32 {{.*}}
 
 ; CHECK:      vst1.64
diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
index b55f1ca..764c58f 100644
--- a/test/CodeGen/ARM/2012-08-09-neon-extload.ll
+++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -18,7 +18,7 @@ define void @test_v2i8tov2i32() {
 
   %i32val = sext <2 x i8> %i8val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
-; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :16]
+; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:16]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 
@@ -32,7 +32,7 @@ define void @test_v2i8tov2i64() {
 
   %i64val = sext <2 x i8> %i8val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
-; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}, :16]
+; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}:16]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vmovl.s32 {{q[0-9]+}}, {{d[0-9]+}}
@@ -50,7 +50,7 @@ define void @test_v4i8tov4i16() {
 
   %i16val = sext <4 x i8> %i8val to <4 x i16>
   store <4 x i16> %i16val, <4 x i16>* @var_v4i16
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK-NOT: vmovl.s16
 
@@ -65,7 +65,7 @@ define void @test_v4i8tov4i32() {
 
   %i16val = sext <4 x i8> %i8val to <4 x i32>
   store <4 x i32> %i16val, <4 x i32>* @var_v4i32
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 
@@ -79,7 +79,7 @@ define void @test_v2i16tov2i32() {
 
   %i32val = sext <2 x i16> %i16val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK-NOT: vmovl
 
@@ -94,7 +94,7 @@ define void @test_v2i16tov2i64() {
 
   %i64val = sext <2 x i16> %i16val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s32 {{q[0-9]+}}, d[[LOAD]]
 
diff --git a/test/CodeGen/ARM/2013-01-21-PR14992.ll b/test/CodeGen/ARM/2013-01-21-PR14992.ll
new file mode 100644
index 0000000..38b9e0e
--- /dev/null
+++ b/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -0,0 +1,28 @@
+;PR14492 - Tablegen incorrectly converts ARM tLDMIA_UPD pseudo to tLDMIA
+;RUN: llc -mtriple=thumbv7 < %s  | FileCheck -check-prefix=EXPECTED %s
+;RUN: llc -mtriple=thumbv7 < %s  | FileCheck %s
+
+;EXPECTED: foo:
+;CHECK: foo:
+define i32 @foo(i32* %a) nounwind optsize {
+entry:
+  %0 = load i32* %a, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds i32* %a, i32 1
+  %1 = load i32* %arrayidx1, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 2
+  %2 = load i32* %arrayidx2, align 4, !tbaa !0
+  %add.ptr = getelementptr inbounds i32* %a, i32 3
+;Make sure we do not have a duplicated register in the front of the reg list
+;EXPECTED:  ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}},
+;CHECK-NOT: ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], [[REG]],
+  tail call void @bar(i32* %add.ptr) nounwind optsize
+  %add = add nsw i32 %1, %0
+  %add3 = add nsw i32 %add, %2
+  ret i32 %add3
+}
+
+declare void @bar(i32*) optsize
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 5e12d8e..c747016 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -61,8 +61,7 @@ ret void
 define i64 @f4(i64* %val) nounwind {
 entry:
   ;CHECK: f4
-  ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r0]
-  ;CHECK: mov r0, [[REG1]]
+  ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
   %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* %val) nounwind
   ret i64 %0
 }
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 69da622..f2c7305 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB
 
 define i64 @test1(i64* %ptr, i64 %val) {
 ; CHECK: test1:
@@ -10,6 +11,17 @@ define i64 @test1(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test1:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: adds.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: adc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw add i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -24,6 +36,17 @@ define i64 @test2(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test2:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: sbc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw sub i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -38,6 +61,17 @@ define i64 @test3(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test3:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw and i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -52,6 +86,17 @@ define i64 @test4(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test4:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw or i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -66,6 +111,17 @@ define i64 @test5(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test5:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw xor i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -78,6 +134,15 @@ define i64 @test6(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test6:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -93,6 +158,19 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test7:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: cmp [[REG1]]
+; CHECK-THUMB: it eq
+; CHECK-THUMB: cmpeq [[REG2]]
+; CHECK-THUMB: bne
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst
   ret i64 %r
 }
@@ -109,6 +187,18 @@ define i64 @test8(i64* %ptr) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test8:
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: cmp [[REG1]]
+; CHECK-THUMB: it eq
+; CHECK-THUMB: cmpeq [[REG2]]
+; CHECK-THUMB: bne
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = load atomic i64* %ptr seq_cst, align 8
   ret i64 %r
 }
@@ -123,6 +213,15 @@ define void @test9(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test9:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   store atomic i64 %val, i64* %ptr seq_cst, align 8
   ret void
 }
@@ -133,11 +232,23 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
 ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: ble
+; CHECK: blt
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test10:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: blt
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw min i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -148,11 +259,24 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
 ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: bls
+; CHECK: blo
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+
+; CHECK-THUMB: test11:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: blo
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw umin i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -168,6 +292,18 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test12:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: bge
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw max i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -183,6 +319,17 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test13:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: bhs
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
   %r = atomicrmw umax i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
diff --git a/test/CodeGen/ARM/ehabi-mc-cantunwind.ll b/test/CodeGen/ARM/ehabi-mc-cantunwind.ll
new file mode 100644
index 0000000..698d76e
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-cantunwind.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @test() nounwind {
+entry:
+  ret void
+}
+
+; CHECK: section .text
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 01000000
diff --git a/test/CodeGen/ARM/ehabi-mc-section-group.ll b/test/CodeGen/ARM/ehabi-mc-section-group.ll
new file mode 100644
index 0000000..5e4b509
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-section-group.ll
@@ -0,0 +1,79 @@
+; Test section group of the function with linkonce_odr
+
+; The instantiation of C++ function template will come with linkonce_odr,
+; which indicates that the linker can remove the duplicated instantiation.
+; However, to make this feature work, we have to group the section properly.
+; .text, .ARM.extab, and .ARM.exidx should be grouped together.
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | elf-dump --dump-section-data \
+; RUN:   | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv4t--linux-gnueabi"
+
+define void @_Z11instantiatev() {
+entry:
+  tail call void @_Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_(i32 1, i32 2, i32 3, i32 4, i32 5, double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, double 5.000000e-01)
+  ret void
+}
+
+define linkonce_odr void @_Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK:      # Section 1
+; CHECK-NEXT: (('sh_name', 0x0000002f) # '.group'
+; CHECK:       ('_section_data', '01000000 0a000000 0c000000 0e000000')
+; CHECK:      # Section 10
+; CHECK-NEXT: (('sh_name', 0x000000e1) # '.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:      # Section 12
+; CHECK-NEXT: (('sh_name', 0x000000d7) # '.ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:      # Section 14
+; CHECK-NEXT: (('sh_name', 0x00000065) # '.ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
diff --git a/test/CodeGen/ARM/ehabi-mc-section.ll b/test/CodeGen/ARM/ehabi-mc-section.ll
new file mode 100644
index 0000000..fc51b24
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-section.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) section ".test_section" {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK: section .test_section
+; CHECK: section .ARM.extab.test_section
+; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK: section .ARM.exidx.test_section
+; CHECK-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-mc-sh_link.ll b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
new file mode 100644
index 0000000..f90e5f3
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
@@ -0,0 +1,47 @@
+; Test the sh_link in Elf32_Shdr.
+
+; The .ARM.exidx section should be linked with corresponding text section.
+; The sh_link in Elf32_Shdr should be filled with the section index of
+; the text section.
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | elf-dump --dump-section-data \
+; RUN:   | FileCheck %s
+
+define void @test1() nounwind {
+entry:
+  ret void
+}
+
+define void @test2() nounwind section ".test_section" {
+entry:
+  ret void
+}
+
+; CHECK: # Section 1
+; CHECK-NEXT: (('sh_name', 0x00000010) # '.text'
+
+; CHECK:      (('sh_name', 0x00000005) # '.ARM.exidx'
+; CHECK-NEXT:  ('sh_type', 0x70000001)
+; CHECK-NEXT:  ('sh_flags', 0x00000082)
+; CHECK-NEXT:  ('sh_addr', 0x00000000)
+; CHECK-NEXT:  ('sh_offset', 0x0000005c)
+; CHECK-NEXT:  ('sh_size', 0x00000008)
+; CHECK-NEXT:  ('sh_link',  0x00000001)
+; CHECK-NEXT:  ('sh_info',  0x00000000)
+; CHECK-NEXT:  ('sh_addralign',  0x00000004)
+
+; CHECK: # Section 7
+; CHECK-NEXT: (('sh_name', 0x00000039) # '.test_section'
+
+; CHECK:      (('sh_name', 0x0000002f) # '.ARM.exidx.test_section'
+; CHECK-NEXT:  ('sh_type', 0x70000001)
+; CHECK-NEXT:  ('sh_flags', 0x00000082)
+; CHECK-NEXT:  ('sh_addr', 0x00000000)
+; CHECK-NEXT:  ('sh_offset', 0x00000068)
+; CHECK-NEXT:  ('sh_size', 0x00000008)
+; CHECK-NEXT:  ('sh_link',  0x00000007)
+; CHECK-NEXT:  ('sh_info',  0x00000000)
+; CHECK-NEXT:  ('sh_addralign',  0x00000004)
diff --git a/test/CodeGen/ARM/ehabi-mc.ll b/test/CodeGen/ARM/ehabi-mc.ll
new file mode 100644
index 0000000..0dc2ef7
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK: section .text
+; CHECK: section .ARM.extab
+; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 46c2f1c..c3e00ce 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -14,12 +14,12 @@ entry:
 declare float @fabsf(float)
 
 ; VFP2: test:
-; VFP2: 	vabs.f32	s2, s2
+; VFP2: 	vabs.f32	s
 
 ; NFP1: test:
-; NFP1: 	vabs.f32	d1, d1
+; NFP1: 	vabs.f32	d
 ; NFP0: test:
-; NFP0: 	vabs.f32	s2, s2
+; NFP0: 	vabs.f32	s
 
 ; CORTEXA8: test:
 ; CORTEXA8:     vadd.f32        [[D1:d[0-9]+]]
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 7d38cc2..4108978 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -231,3 +231,10 @@ define void @t6() nounwind ssp {
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
   ret void
 }
+
+; rdar://13202135
+define void @t7() nounwind ssp {
+; Just make sure this doesn't assert when we have an odd length and an alignment of 2.
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 8fab002..8f13f39 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -10,14 +10,14 @@ entry:
 }
 
 ; VFP2: test:
-; VFP2: 	vdiv.f32	s0, s2, s0
+; VFP2: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 
 ; NFP1: test:
-; NFP1: 	vdiv.f32	s0, s2, s0
+; NFP1: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 ; NFP0: test:
-; NFP0: 	vdiv.f32	s0, s2, s0
+; NFP0: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vdiv.f32	s0, s2, s0
+; CORTEXA8: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 ; CORTEXA9: test:
 ; CORTEXA9: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 6081712..9ce9b7a 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -46,8 +46,8 @@ entry:
 ; NEON: vnmla.f64
 
 ; A8: t3:
-; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vnmul.f64 d
+; A8: vsub.f64 d
 	%0 = fmul double %a, %b
 	%1 = fsub double -0.0, %0
         %2 = fsub double %1, %acc
@@ -63,8 +63,8 @@ entry:
 ; NEON: vnmla.f64
 
 ; A8: t4:
-; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vnmul.f64 d
+; A8: vsub.f64 d
 	%0 = fmul double %a, %b
 	%1 = fmul double -1.0, %0
         %2 = fsub double %1, %acc
diff --git a/test/CodeGen/ARM/fp128.ll b/test/CodeGen/ARM/fp128.ll
deleted file mode 100644
index bdeb547..0000000
--- a/test/CodeGen/ARM/fp128.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc -mtriple=arm-none-linux < %s | FileCheck --check-prefix=LITTLEENDIAN %s
-
-@var = global fp128 0xL00000000000000008000000000000000
-
-; CHECK-LITTLEENDIAN: var:
-; CHECK-LITTLEENDIAN-NEXT: .long   0                       @ fp128 -0
-; CHECK-LITTLEENDIAN-NEXT: .long   0
-; CHECK-LITTLEENDIAN-NEXT: .long   0
-; CHECK-LITTLEENDIAN-NEXT: .long   2147483648
-
diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll
new file mode 100644
index 0000000..be5eb81
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-64bit.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -O3  -mtriple=arm-linux-gnueabi | FileCheck %s
+
+; check if regs are passing correctly
+define void @i64_write(i64* %p, i64 %val) nounwind {
+; CHECK: i64_write:
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
+  %1 = tail call i64 asm sideeffect "1: ldrexd $0, ${0:H}, [$2]\0A strexd $0, $3, ${3:H}, [$2]\0A teq $0, #0\0A bne 1b", "=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %val) nounwind
+  ret void
+}
+
+; check if register allocation can reuse the registers
+define void @multi_writes(i64* %p, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind {
+entry:
+; CHECK: multi_writes:
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %p, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  %incdec.ptr = getelementptr inbounds i64* %p, i32 1
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %incdec.ptr, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %incdec.ptr, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  ret void
+}
+
+
+; check if callee-saved registers used by inline asm are saved/restored
+define void @foo(i64* %p, i64 %i) nounwind {
+; CHECK:foo:
+; CHECK: push {{{r[4-9]|r10|r11}}
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
+; CHECK: pop {{{r[4-9]|r10|r11}}
+  %1 = tail call { i64, i64 } asm sideeffect "@ atomic64_set\0A1: ldrexd $0, ${0:H}, [$3]\0Aldrexd $1, ${1:H}, [$3]\0A strexd $0, $4, ${4:H}, [$3]\0A teq $0, #0\0A bne 1b", "=&r,=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %i) nounwind
+  ret void
+}
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 2fcc45f..390a44e 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -30,7 +30,7 @@ entry:
 
 define hidden void @conv4_8_E() nounwind {
 entry:
-%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1, :128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
+%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1:128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
 unreachable
 }
 
diff --git a/test/CodeGen/ARM/neon_cmp.ll b/test/CodeGen/ARM/neon_cmp.ll
new file mode 100644
index 0000000..046b5da
--- /dev/null
+++ b/test/CodeGen/ARM/neon_cmp.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; bug 15283
+; radar://13191881
+; CHECK: vfcmp
+define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
+  %wide.load = load <2 x double>* %a, align 4
+  %wide.load2 = load <2 x double>* %b, align 4
+; CHECK-NOT: vdup.32
+; CHECK-NOT: vmovn.i64
+  %v1 = fcmp olt <2 x double> %wide.load, %wide.load2
+  %v2 = zext <2 x i1> %v1 to <2 x i32>
+  %v3 = sitofp <2 x i32> %v2 to <2 x double>
+  store <2 x double> %v3, <2 x double>* %b, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/neon_fpconv.ll b/test/CodeGen/ARM/neon_fpconv.ll
index 1948ad8..149f4c7 100644
--- a/test/CodeGen/ARM/neon_fpconv.ll
+++ b/test/CodeGen/ARM/neon_fpconv.ll
@@ -15,3 +15,28 @@ define <2 x double> @vextend(<2 x float> %a) {
   ret <2 x double> %ve
 }
 
+; We used to generate vmovs between scalar and vfp/neon registers.
+; CHECK: vsitofp_double
+define void @vsitofp_double(<2 x i32>* %loadaddr,
+                            <2 x double>* %storeaddr) {
+  %v0 = load <2 x i32>* %loadaddr
+; CHECK:      vldr
+; CHECK-NEXT:	vcvt.f64.s32
+; CHECK-NEXT:	vcvt.f64.s32
+; CHECK-NEXT:	vst
+  %r = sitofp <2 x i32> %v0 to <2 x double>
+  store <2 x double> %r, <2 x double>* %storeaddr
+  ret void
+}
+; CHECK: vuitofp_double
+define void @vuitofp_double(<2 x i32>* %loadaddr,
+                            <2 x double>* %storeaddr) {
+  %v0 = load <2 x i32>* %loadaddr
+; CHECK:      vldr
+; CHECK-NEXT:	vcvt.f64.u32
+; CHECK-NEXT:	vcvt.f64.u32
+; CHECK-NEXT:	vst
+  %r = uitofp <2 x i32> %v0 to <2 x double>
+  store <2 x double> %r, <2 x double>* %storeaddr
+  ret void
+}
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 497619e..25a670b 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -7,10 +7,10 @@
 ; CHECK: vadd.i64 q
 ; CHECK: vst1.64
 ; SWIFT: t1
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 ; SWIFT: vadd.i64 q
-; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
@@ -28,8 +28,8 @@ entry:
 ; CHECK: vmov r0, r1, d
 ; CHECK: vmov r2, r3, d
 ; SWIFT: t2
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 ; SWIFT: vsub.i64 q
 ; SWIFT: vmov r0, r1, d
 ; SWIFT: vmov r2, r3, d
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 6d6586e..fd2083c 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -242,8 +242,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        vldr
 ; CHECK-NOT:    vmov d{{.*}}, d16
 ; CHECK:        vmov.i32 d17
-; CHECK-NEXT:   vst1.64 {d16, d17}, [r0, :128]
-; CHECK-NEXT:   vst1.64 {d16, d17}, [r0, :128]
+; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
+; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
   %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   store <4 x float> %4, <4 x float>* undef, align 16
diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
new file mode 100644
index 0000000..d8241d0
--- /dev/null
+++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -O1 -mtriple thumbv7-apple-ios6
+; Just make sure no one tries to make the assumption that the normal edge of an
+; invoke is never a critical edge.  Previously, this code would assert.
+
+%struct.__CFString = type opaque
+
+declare void @bar(%struct.__CFString*, %struct.__CFString*)
+
+define noalias i8* @foo(i8* nocapture %inRefURL) noreturn ssp {
+entry:
+  %call = tail call %struct.__CFString* @bar3()
+  %call2 = invoke i8* @bar2()
+          to label %for.cond unwind label %lpad
+
+for.cond:                                         ; preds = %entry, %for.cond
+  invoke void @bar(%struct.__CFString* undef, %struct.__CFString* null)
+          to label %for.cond unwind label %lpad5
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = extractvalue { i8*, i32 } %0, 1
+  br label %ehcleanup
+
+lpad5:                                            ; preds = %for.cond
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %4 = extractvalue { i8*, i32 } %3, 0
+  %5 = extractvalue { i8*, i32 } %3, 1
+  invoke void @release(i8* %call2)
+          to label %ehcleanup unwind label %terminate.lpad.i.i16
+
+terminate.lpad.i.i16:                             ; preds = %lpad5
+  %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  tail call void @terminatev() noreturn nounwind
+  unreachable
+
+ehcleanup:                                        ; preds = %lpad5, %lpad
+  %exn.slot.0 = phi i8* [ %1, %lpad ], [ %4, %lpad5 ]
+  %ehselector.slot.0 = phi i32 [ %2, %lpad ], [ %5, %lpad5 ]
+  %7 = bitcast %struct.__CFString* %call to i8*
+  invoke void @release(i8* %7)
+          to label %_ZN5SmartIPK10__CFStringED1Ev.exit unwind label %terminate.lpad.i.i
+
+terminate.lpad.i.i:                               ; preds = %ehcleanup
+  %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  tail call void @terminatev() noreturn nounwind
+  unreachable
+
+_ZN5SmartIPK10__CFStringED1Ev.exit:               ; preds = %ehcleanup
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+  %lpad.val12 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+  resume { i8*, i32 } %lpad.val12
+}
+
+declare %struct.__CFString* @bar3()
+
+declare i8* @bar2()
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @release(i8*)
+
+declare void @terminatev()
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 057ea11..e93cdbc 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
 ; CHECK: bic {{.*}}, #15
-; CHECK: vst1.64 {{.*}}sp, :128
-; CHECK: vld1.64 {{.*}}sp, :128
+; CHECK: vst1.64 {{.*}}sp:128
+; CHECK: vld1.64 {{.*}}sp:128
 entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
index 21865f8..a4e3c3c 100644
--- a/test/CodeGen/ARM/trap.ll
+++ b/test/CodeGen/ARM/trap.ll
@@ -1,5 +1,23 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=INSTR
 ; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap | FileCheck %s -check-prefix=FUNC
+; RUN: llc -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7 -mattr=+nacl-trap -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -fast-isel -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7 -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
+; RUN: llc -fast-isel -mtriple=armv7 -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
 ; rdar://7961298
 ; rdar://9249183
 
@@ -10,6 +28,11 @@ entry:
 
 ; FUNC: t:
 ; FUNC: bl __trap
+
+; ENCODING-NACL: f0 de fe e7
+
+; ENCODING-ALL: fe de ff e7
+
   call void @llvm.trap()
   unreachable
 }
@@ -21,6 +44,11 @@ entry:
 
 ; FUNC: t2:
 ; FUNC: bl __trap
+
+; ENCODING-NACL: f0 de fe e7
+
+; ENCODING-ALL: fe de ff e7
+
   call void @llvm.debugtrap()
   unreachable
 }
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index a38a0fe..42964de 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -133,3 +133,30 @@ define i16 @foldBuildVectors() {
   %3 = extractelement <8 x i16> %2, i32 0
   ret i16 %3
 }
+
+; Test that we are generating vrev and vext for reverse shuffles of v8i16
+; shuffles.
+; CHECK: reverse_v8i16
+define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
+  %v0 = load <8 x i16>* %loadaddr
+  ; CHECK: vrev64.16
+  ; CHECK: vext.16
+  %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
+              <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <8 x i16> %v1, <8 x i16>* %storeaddr
+  ret void
+}
+
+; Test that we are generating vrev and vext for reverse shuffles of v16i8
+; shuffles.
+; CHECK: reverse_v16i8
+define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
+  %v0 = load <16 x i8>* %loadaddr
+  ; CHECK: vrev64.8
+  ; CHECK: vext.8
+  %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
+       <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8,
+                   i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <16 x i8> %v1, <16 x i8>* %storeaddr
+  ret void
+}
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index e524395..994f05d 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -4,7 +4,7 @@
 define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK: vld1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld1.8 {d16}, [r0, :64]
+;CHECK: vld1.8 {d16}, [r0:64]
 	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
 	ret <8 x i8> %tmp1
 }
@@ -68,7 +68,7 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;CHECK: vld1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.8 {d16, d17}, [r0, :64]
+;CHECK: vld1.8 {d16, d17}, [r0:64]
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	ret <16 x i8> %tmp1
 }
@@ -76,7 +76,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;Check for a post-increment updating load.
 define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 ;CHECK: vld1Qi8_update:
-;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]!
+;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
 	%A = load i8** %ptr
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	%tmp2 = getelementptr i8* %A, i32 16
@@ -87,7 +87,7 @@ define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 ;CHECK: vld1Qi16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.16 {d16, d17}, [r0, :128]
+;CHECK: vld1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
 	ret <8 x i16> %tmp1
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 29b3794..caa016e 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -14,7 +14,7 @@
 define <8 x i8> @vld2i8(i8* %A) nounwind {
 ;CHECK: vld2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld2.8 {d16, d17}, [r0, :64]
+;CHECK: vld2.8 {d16, d17}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
@@ -25,7 +25,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;CHECK: vld2i16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld2.16 {d16, d17}, [r0, :128]
+;CHECK: vld2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
@@ -74,7 +74,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {
 define <1 x i64> @vld2i64(i64* %A) nounwind {
 ;CHECK: vld2i64:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.64 {d16, d17}, [r0, :128]
+;CHECK: vld1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
@@ -86,7 +86,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
 define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;CHECK: vld2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
@@ -97,7 +97,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK: vld2Qi8_update:
-;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8** %ptr
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
@@ -111,7 +111,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 ;CHECK: vld2Qi16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
@@ -123,7 +123,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 ;CHECK: vld2Qi32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index b495319..ad63e1f 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -15,7 +15,7 @@
 define <8 x i8> @vld3i8(i8* %A) nounwind {
 ;CHECK: vld3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld3.8 {d16, d17, d18}, [r0, :64]
+;CHECK: vld3.8 {d16, d17, d18}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
@@ -74,7 +74,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
 define <1 x i64> @vld3i64(i64* %A) nounwind {
 ;CHECK: vld3i64:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld1.64 {d16, d17, d18}, [r0, :64]
+;CHECK: vld1.64 {d16, d17, d18}, [r0:64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
@@ -86,8 +86,8 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;CHECK: vld3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld3.8 {d16, d18, d20}, [r0, :64]!
-;CHECK: vld3.8 {d17, d19, d21}, [r0, :64]
+;CHECK: vld3.8 {d16, d18, d20}, [r0:64]!
+;CHECK: vld3.8 {d17, d19, d21}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 59a73db..9ee5fe4 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -14,7 +14,7 @@
 define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;CHECK: vld4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
@@ -25,7 +25,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK: vld4i8_update:
-;CHECK: vld4.8 {d16, d17, d18, d19}, [r2, :128], r1
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8** %ptr
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
 	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
@@ -39,7 +39,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 define <4 x i16> @vld4i16(i16* %A) nounwind {
 ;CHECK: vld4i16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
@@ -51,7 +51,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 define <2 x i32> @vld4i32(i32* %A) nounwind {
 ;CHECK: vld4i32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
@@ -74,7 +74,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
 define <1 x i64> @vld4i64(i64* %A) nounwind {
 ;CHECK: vld4i64:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld1.64 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
@@ -86,8 +86,8 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;CHECK: vld4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]!
-;CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]
+;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]!
+;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256]
 	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
         %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
@@ -111,8 +111,8 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 ;Check for a post-increment updating load. 
 define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
 ;CHECK: vld4Qi16_update:
-;CHECK: vld4.16 {d16, d18, d20, d22}, [r1, :64]!
-;CHECK: vld4.16 {d17, d19, d21, d23}, [r1, :64]!
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index c69473f..7c7319c 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -13,7 +13,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {
 define <4 x i16> @vld1dupi16(i16* %A) nounwind {
 ;CHECK: vld1dupi16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld1.16 {d16[]}, [r0, :16]
+;CHECK: vld1.16 {d16[]}, [r0:16]
 	%tmp1 = load i16* %A, align 8
 	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
 	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -23,7 +23,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
 define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 ;CHECK: vld1dupi32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld1.32 {d16[]}, [r0, :32]
+;CHECK: vld1.32 {d16[]}, [r0:32]
 	%tmp1 = load i32* %A, align 8
 	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
 	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -32,7 +32,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0, :32]
+;CHECK: vld1.32 {d16[]}, [r0:32]
 	%tmp0 = load float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -51,7 +51,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0, :32]
+;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
         %tmp0 = load float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
@@ -109,7 +109,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 define <2 x i32> @vld2dupi32(i8* %A) nounwind {
 ;CHECK: vld2dupi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
+;CHECK: vld2.32 {d16[], d17[]}, [r0:64]
 	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
 	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -194,7 +194,7 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
 ;CHECK: vld4dupi32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
+;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64]
 	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 7bd0cbd..f35fa92 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -14,7 +14,7 @@ define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld1.16 {d16[2]}, [r0, :16]
+;CHECK: vld1.16 {d16[2]}, [r0:16]
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = load i16* %A, align 8
 	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
@@ -24,7 +24,7 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = load i32* %A, align 8
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
@@ -34,7 +34,7 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld1lanei32a32:
 ;Check the alignment value.  Legal values are none or :32.
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = load i32* %A, align 4
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
@@ -43,7 +43,7 @@ define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld1lanef:
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x float>* %B
 	%tmp2 = load float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
@@ -61,7 +61,7 @@ define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 
 define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld1laneQi16:
-;CHECK: vld1.16 {d17[1]}, [r0, :16]
+;CHECK: vld1.16 {d17[1]}, [r0:16]
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = load i16* %A, align 8
 	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
@@ -70,7 +70,7 @@ define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld1laneQi32:
-;CHECK: vld1.32 {d17[1]}, [r0, :32]
+;CHECK: vld1.32 {d17[1]}, [r0:32]
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = load i32* %A, align 8
 	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
@@ -79,7 +79,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld1laneQf:
-;CHECK: vld1.32 {d16[0]}, [r0, :32]
+;CHECK: vld1.32 {d16[0]}, [r0:32]
 	%tmp1 = load <4 x float>* %B
 	%tmp2 = load float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
@@ -98,7 +98,7 @@ define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
+;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
@@ -110,7 +110,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld2lanei16:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
+;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
@@ -176,7 +176,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}, :64]
+;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
@@ -354,7 +354,7 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x flo
 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}, :32]
+;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
@@ -370,7 +370,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating load.
 define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8_update:
-;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]!
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
@@ -408,7 +408,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4lanei32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :64]
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
@@ -441,7 +441,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}, :64]
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 364d44b..e1f3e88 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -3,7 +3,7 @@
 define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst1.8 {d16}, [r0, :64]
+;CHECK: vst1.8 {d16}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 	ret void
@@ -61,7 +61,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.8 {d16, d17}, [r0, :64]
+;CHECK: vst1.8 {d16, d17}, [r0:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 	ret void
@@ -70,7 +70,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1Qi16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.16 {d16, d17}, [r0, :128]
+;CHECK: vst1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
@@ -80,7 +80,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check for a post-increment updating store with register increment.
 define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK: vst1Qi16_update:
-;CHECK: vst1.16 {d16, d17}, [r1, :64], r2
+;CHECK: vst1.16 {d16, d17}, [r1:64], r2
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index fb05a20..a31f863 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -3,7 +3,7 @@
 define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst2.8 {d16, d17}, [r0, :64]
+;CHECK: vst2.8 {d16, d17}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
@@ -24,7 +24,7 @@ define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2i16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst2.16 {d16, d17}, [r0, :128]
+;CHECK: vst2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
@@ -52,7 +52,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.64 {d16, d17}, [r0, :128]
+;CHECK: vst1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
@@ -62,7 +62,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64_update:
-;CHECK: vst1.64 {d16, d17}, [r1, :64]!
+;CHECK: vst1.64 {d16, d17}, [r1:64]!
 	%A = load i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
@@ -75,7 +75,7 @@ define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 	ret void
@@ -84,7 +84,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2Qi16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
@@ -94,7 +94,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2Qi32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index f117ab2..281bb73 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -4,7 +4,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 	ret void
@@ -54,7 +54,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst3i64:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
@@ -65,8 +65,8 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]!
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
 	ret void
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index e94acb6..7dedb2f 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -3,7 +3,7 @@
 define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
@@ -12,7 +12,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating store with register increment.
 define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK: vst4i8_update:
-;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
@@ -24,7 +24,7 @@ define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4i16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
@@ -34,7 +34,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4i32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
@@ -53,7 +53,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst4i64:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
@@ -63,8 +63,8 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!
-;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]
+;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
+;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
 	ret void
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 758b355..67f251f 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -26,7 +26,7 @@ define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vst1.16 {d16[2]}, [r0, :16]
+;CHECK: vst1.16 {d16[2]}, [r0:16]
 	%tmp1 = load <4 x i16>* %B
         %tmp2 = extractelement <4 x i16> %tmp1, i32 2
         store i16 %tmp2, i16* %A, align 8
@@ -36,7 +36,7 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst1.32 {d16[1]}, [r0, :32]
+;CHECK: vst1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
         %tmp2 = extractelement <2 x i32> %tmp1, i32 1
         store i32 %tmp2, i32* %A, align 8
@@ -45,7 +45,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0, :32]
+;CHECK: vst1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
@@ -64,7 +64,7 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 
 define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1laneQi16:
-;CHECK: vst1.16 {d17[1]}, [r0, :16]
+;CHECK: vst1.16 {d17[1]}, [r0:16]
 	%tmp1 = load <8 x i16>* %B
         %tmp2 = extractelement <8 x i16> %tmp1, i32 5
         store i16 %tmp2, i16* %A, align 8
@@ -74,7 +74,7 @@ define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32:
 ; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {d17[1]}, [r0, :32]
+; // CHE-CK: vst1.32 {d17[1]}, [r0:32]
 	%tmp1 = load <4 x i32>* %B
         %tmp2 = extractelement <4 x i32> %tmp1, i32 3
         store i32 %tmp2, i32* %A, align 8
@@ -85,7 +85,7 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32_update:
 ; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {d17[1]}, [r1, :32]!
+; // CHE-CK: vst1.32 {d17[1]}, [r1:32]!
 	%A = load i32** %ptr
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
@@ -108,7 +108,7 @@ define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
 define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
+;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 	ret void
@@ -117,7 +117,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2lanei16:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
+;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
@@ -168,7 +168,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
+;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
@@ -283,7 +283,7 @@ declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x f
 define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	ret void
@@ -292,7 +292,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vst4lanei8_update:
-;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
@@ -313,7 +313,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4lanei32:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
@@ -332,7 +332,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
diff --git a/test/CodeGen/Generic/inline-asm-mem-clobber.ll b/test/CodeGen/Generic/inline-asm-mem-clobber.ll
new file mode 100644
index 0000000..e523d03
--- /dev/null
+++ b/test/CodeGen/Generic/inline-asm-mem-clobber.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O2 < %s | FileCheck %s
+
+@G = common global i32 0, align 4
+
+define i32 @foo(i8* %p) nounwind uwtable {
+entry:
+  %p.addr = alloca i8*, align 8
+  %rv = alloca i32, align 4
+  store i8* %p, i8** %p.addr, align 8
+  store i32 0, i32* @G, align 4
+  %0 = load i8** %p.addr, align 8
+; CHECK: blah
+  %1 = call i32 asm "blah", "=r,r,~{memory}"(i8* %0) nounwind
+; CHECK: @G
+  store i32 %1, i32* %rv, align 4
+  %2 = load i32* %rv, align 4
+  %3 = load i32* @G, align 4
+  %add = add nsw i32 %2, %3
+  ret i32 %add
+}
+
diff --git a/test/CodeGen/Hexagon/absaddr-store.ll b/test/CodeGen/Hexagon/absaddr-store.ll
new file mode 100644
index 0000000..5c2554d
--- /dev/null
+++ b/test/CodeGen/Hexagon/absaddr-store.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instructions with absolute addressing mode.
+
+@a = external global i32
+@b = external global i8
+@c = external global i16
+@d = external global i64
+
+define zeroext i8 @absStoreByte() nounwind {
+; CHECK: memb(##b){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i8* @b, align 1
+  %conv = zext i8 %0 to i32
+  %mul = mul nsw i32 100, %conv
+  %conv1 = trunc i32 %mul to i8
+  store i8 %conv1, i8* @b, align 1
+  ret i8 %conv1
+}
+
+define signext i16 @absStoreHalf() nounwind {
+; CHECK: memh(##c){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i16* @c, align 2
+  %conv = sext i16 %0 to i32
+  %mul = mul nsw i32 100, %conv
+  %conv1 = trunc i32 %mul to i16
+  store i16 %conv1, i16* @c, align 2
+  ret i16 %conv1
+}
+
+define i32 @absStoreWord() nounwind {
+; CHECK: memw(##a){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i32* @a, align 4
+  %mul = mul nsw i32 100, %0
+  store i32 %mul, i32* @a, align 4
+  ret i32 %mul
+}
+
+define void @absStoreDouble() nounwind {
+; CHECK: memd(##d){{ *}}={{ *}}r{{[0-9]+}}:{{[0-9]+}}
+entry:
+  store i64 100, i64* @d, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Hexagon/cext-check.ll b/test/CodeGen/Hexagon/cext-check.ll
new file mode 100644
index 0000000..7c4b19e
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-check.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we constant extended instructions only when necessary.
+
+define i32 @cext_test1(i32* %a) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##8000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##4092)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300)
+entry:
+  %0 = load i32* %a, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %arrayidx1 = getelementptr inbounds i32* %a, i32 2000
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, 300000
+  br label %return
+
+if.end:
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1023
+  %2 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %2, 300
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add3, %if.end ]
+  ret i32 %retval.0
+}
+
+define i32 @cext_test2(i8* %a) nounwind {
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1023)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1024)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##6000)
+entry:
+  %tobool = icmp ne i8* %a, null
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %arrayidx = getelementptr inbounds i8* %a, i32 1023
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 300000
+  br label %return
+
+if.end:
+  %arrayidx1 = getelementptr inbounds i8* %a, i32 1024
+  %1 = load i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i32
+  %add3 = add nsw i32 %conv2, 6000
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add3, %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/Hexagon/cmp-to-genreg.ll b/test/CodeGen/Hexagon/cmp-to-genreg.ll
new file mode 100644
index 0000000..97cf51c
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-to-genreg.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate compare to general register.
+
+define i32 @compare1(i32 %a) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}cmp.eq(r{{[0-9]+}},{{ *}}#120)
+entry:
+  %cmp = icmp eq i32 %a, 120
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare2(i32 %a) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#120)
+entry:
+  %cmp = icmp ne i32 %a, 120
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare3(i32 %a, i32 %b) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp eq i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare4(i32 %a, i32 %b) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/Hexagon/cmp-to-predreg.ll b/test/CodeGen/Hexagon/cmp-to-predreg.ll
new file mode 100644
index 0000000..d430b90
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-to-predreg.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate compare to predicate register.
+
+define i32 @compare1(i32 %a, i32 %b) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %add = add nsw i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %add.sub = select i1 %cmp, i32 %add, i32 %sub
+  ret i32 %add.sub
+}
+
+define i32 @compare2(i32 %a) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#10)
+entry:
+  %cmp = icmp ne i32 %a, 10
+  %add = add nsw i32 %a, 10
+  %sub = sub nsw i32 %a, 10
+  %add.sub = select i1 %cmp, i32 %add, i32 %sub
+  ret i32 %add.sub
+}
+
+define i32 @compare3(i32 %a, i32 %b) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}cmp.gt(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %add = add nsw i32 %a, %b
+  %sub.add = select i1 %cmp, i32 %sub, i32 %add
+  ret i32 %sub.add
+}
+
+define i32 @compare4(i32 %a) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}cmp.gt(r{{[0-9]+}},{{ *}}#10)
+entry:
+  %cmp = icmp sgt i32 %a, 10
+  %sub = sub nsw i32 %a, 10
+  %add = add nsw i32 %a, 10
+  %sub.add = select i1 %cmp, i32 %sub, i32 %add
+  ret i32 %sub.add
+}
+
diff --git a/test/CodeGen/Hexagon/cmp_pred.ll b/test/CodeGen/Hexagon/cmp_pred.ll
new file mode 100644
index 0000000..37db3b4
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+define i32 @Func_3Ugt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ugt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Uge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp uge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ult(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ult i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ule(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ule i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ueq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Une(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3UneC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK: mux
+  %cmp = icmp sgt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3lt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp slt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3le(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sle i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3eq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ne(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3neC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
diff --git a/test/CodeGen/Hexagon/cmp_pred_reg.ll b/test/CodeGen/Hexagon/cmp_pred_reg.ll
new file mode 100644
index 0000000..37db3b4
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred_reg.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+define i32 @Func_3Ugt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ugt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Uge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp uge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ult(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ult i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ule(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ule i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ueq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Une(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3UneC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK: mux
+  %cmp = icmp sgt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3lt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp slt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3le(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sle i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3eq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ne(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3neC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
diff --git a/test/CodeGen/Hexagon/cmpb_pred.ll b/test/CodeGen/Hexagon/cmpb_pred.ll
new file mode 100644
index 0000000..1e61447
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmpb_pred.ll
@@ -0,0 +1,92 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+@Enum_global = external global i8
+
+define i32 @Func_3(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp eq i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3b(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp ne i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3c(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp eq i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3d(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp eq i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3e(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp eq i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3f(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp ugt i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3g(i32) nounwind readnone {
+entry:
+; CHECK: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp ult i32 %conv, 3
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3h(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 254
+  %cmp = icmp ult i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3i(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 254
+  %cmp = icmp ugt i32 %conv, 1
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
new file mode 100644
index 0000000..921ce99
--- /dev/null
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: word
+; CHECK: combine(#0
+
+define void @word(i32* nocapture %a) nounwind {
+entry:
+  %0 = load i32* %a, align 4, !tbaa !0
+  %1 = zext i32 %0 to i64
+  %add.ptr = getelementptr inbounds i32* %a, i32 1
+  %2 = load i32* %add.ptr, align 4, !tbaa !0
+  %3 = zext i32 %2 to i64
+  %4 = shl nuw i64 %3, 32
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+declare void @bar(i64)
+
+; CHECK: halfword
+; CHECK: combine(#0
+
+define void @halfword(i16* nocapture %a) nounwind {
+entry:
+  %0 = load i16* %a, align 2, !tbaa !3
+  %1 = zext i16 %0 to i64
+  %add.ptr = getelementptr inbounds i16* %a, i32 1
+  %2 = load i16* %add.ptr, align 2, !tbaa !3
+  %3 = zext i16 %2 to i64
+  %4 = shl nuw nsw i64 %3, 16
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+; CHECK: byte
+; CHECK: combine(#0
+
+define void @byte(i8* nocapture %a) nounwind {
+entry:
+  %0 = load i8* %a, align 1, !tbaa !1
+  %1 = zext i8 %0 to i64
+  %add.ptr = getelementptr inbounds i8* %a, i32 1
+  %2 = load i8* %add.ptr, align 1, !tbaa !1
+  %3 = zext i8 %2 to i64
+  %4 = shl nuw nsw i64 %3, 8
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
new file mode 100644
index 0000000..e942f8d
--- /dev/null
+++ b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} |= lsr(r{{[0-9]+}}:{{[0-9]+}}, #4)
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} &= lsr(r{{[0-9]+}}:{{[0-9]+}}, #2)
+; CHECK: r{{[0-9]+}} += lsr(r{{[0-9]+}}, #4)
+
+define i32 @foo(i64 %a, i32 %b) nounwind  {
+entry:
+        %tmp0 = tail call i64 @llvm.ctlz.i64( i64 %a, i1 true )
+        %tmp1 = tail call i64 @llvm.cttz.i64( i64 %a, i1 true )
+        %tmp2 = tail call i32 @llvm.ctlz.i32( i32 %b, i1 true )
+        %tmp3 = tail call i32 @llvm.cttz.i32( i32 %b, i1 true )
+        %tmp4 = tail call i64 @llvm.ctpop.i64( i64 %a )
+        %tmp5 = tail call i32 @llvm.ctpop.i32( i32 %b )
+
+
+        %tmp6 = trunc i64 %tmp0 to i32
+        %tmp7 = trunc i64 %tmp1 to i32
+        %tmp8 = trunc i64 %tmp4 to i32
+        %tmp9 = add i32 %tmp6, %tmp7
+        %tmp10 = add i32 %tmp9, %tmp8
+        %tmp11 = add i32 %tmp10, %tmp2
+        %tmp12 = add i32 %tmp11, %tmp3
+        %tmp13 = add i32 %tmp12, %tmp5
+
+        ret i32 %tmp13
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
index 0674995..3bf6019 100644
--- a/test/CodeGen/Hexagon/dualstore.ll
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; Check that we generate dual stores in one packet in V4
 
-; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}#100000
-; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}#500000
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##100000
+; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##500000
 ; CHECK-NEXT: }
 
 @Reg = global i32 0, align 4
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-load.ll b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
new file mode 100644
index 0000000..a1b80a6
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instructions with global + offset
+
+%struct.struc = type { i8, i8, i16, i32 }
+
+@foo = common global %struct.struc zeroinitializer, align 4
+
+define void @loadWord(i32 %val1, i32 %val2, i32* nocapture %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(##foo{{ *}}+{{ *}}4)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 3), align 4
+  store i32 %0, i32* %ival, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @loadByte(i32 %val1, i32 %val2, i8* nocapture %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(##foo{{ *}}+{{ *}}1)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  store i8 %0, i8* %ival, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @loadHWord(i32 %val1, i32 %val2, i16* %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(##foo{{ *}}+{{ *}}2)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  store i16 %0, i16* %ival, align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-store.ll b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
new file mode 100644
index 0000000..c782b30
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate store instructions with global + offset
+
+%struct.struc = type { i8, i8, i16, i32 }
+
+@foo = common global %struct.struc zeroinitializer, align 4
+
+define void @storeByte(i32 %val1, i32 %val2, i8 zeroext %ival) nounwind {
+; CHECK: memb(##foo{{ *}}+{{ *}}1){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i8 %ival, i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @storeHW(i32 %val1, i32 %val2, i16 signext %ival) nounwind {
+; CHECK: memh(##foo{{ *}}+{{ *}}2){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i16 %ival, i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
diff --git a/test/CodeGen/Hexagon/hwloop-cleanup.ll b/test/CodeGen/Hexagon/hwloop-cleanup.ll
new file mode 100644
index 0000000..6456ebf
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-cleanup.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we remove the compare and induction variable instructions
+; after generating hardware loops.
+; Bug 6685.
+
+; CHECK: loop0
+; CHECK-NOT: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#-1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+
+define i32 @test1(i32* nocapture %b, i32 %n) nounwind readonly {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %sum.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %b, %for.body.preheader ]
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, %sum.03
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, %n
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
+  ret i32 %sum.0.lcssa
+}
+
+; This test checks that that initial loop count value is removed.
+; CHECK-NOT: ={{.}}#40
+; CHECK: loop0
+; CHECK-NOT: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#-1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+
+define i32 @test2(i32* nocapture %b) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:
+  %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, %sum.02
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 40
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add
+}
+
+; This test checks that we don't remove the induction variable since it's used.
+; CHECK: loop0
+; CHECK: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+define i32 @test3(i32* nocapture %b) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  store i32 %i.01, i32* %arrayidx.phi, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 40
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 0
+}
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-const.ll b/test/CodeGen/Hexagon/hwloop-const.ll
new file mode 100644
index 0000000..a621c58
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-const.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O2 < %s | FileCheck %s
+; ModuleID = 'hwloop-const.c'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon-unknown-linux-gnu"
+
+@b = common global [25000 x i32] zeroinitializer, align 8
+@a = common global [25000 x i32] zeroinitializer, align 8
+@c = common global [25000 x i32] zeroinitializer, align 8
+
+define i32 @hwloop_bug() nounwind {
+entry:
+  br label %for.body
+
+; CHECK: endloop
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [25000 x i32]* @b, i32 0, i32 %i.02
+  store i32 %i.02, i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds [25000 x i32]* @a, i32 0, i32 %i.02
+  store i32 %i.02, i32* %arrayidx1, align 4, !tbaa !0
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 25000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
new file mode 100644
index 0000000..eaffa07
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=hexagon -mcpu=hexagonv4 -O2 -disable-lsr | FileCheck %s
+; ModuleID = 'hwloop-dbg.o'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i32* %b}, i64 0, metadata !14), !dbg !18
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !19
+  br label %for.body, !dbg !19
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK:     loop0(
+; CHECK-NOT: add({{r[0-9]*}}, #
+; CHECK:     endloop0
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32* %b.addr.01, i32 1, !dbg !21
+  tail call void @llvm.dbg.value(metadata !{i32* %incdec.ptr}, i64 0, metadata !14), !dbg !21
+  %0 = load i32* %b.addr.01, align 4, !dbg !21, !tbaa !23
+  store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21, !tbaa !23
+  %inc = add nsw i32 %i.02, 1, !dbg !26
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !15), !dbg !26
+  %exitcond = icmp eq i32 %inc, 10, !dbg !19
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !19
+
+for.end:                                          ; preds = %for.body
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*)* @foo, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786473, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !9}
+!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{metadata !12}
+!12 = metadata !{metadata !13, metadata !14, metadata !15}
+!13 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!14 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554433, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
+!15 = metadata !{i32 786688, metadata !16, metadata !"i", metadata !6, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2]
+!16 = metadata !{i32 786443, metadata !5, i32 1, i32 26, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!17 = metadata !{i32 1, i32 15, metadata !5, null}
+!18 = metadata !{i32 1, i32 23, metadata !5, null}
+!19 = metadata !{i32 3, i32 8, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !16, i32 3, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!21 = metadata !{i32 4, i32 5, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !20, i32 3, i32 28, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!23 = metadata !{metadata !"int", metadata !24}
+!24 = metadata !{metadata !"omnipotent char", metadata !25}
+!25 = metadata !{metadata !"Simple C/C++ TBAA"}
+!26 = metadata !{i32 3, i32 23, metadata !20, null}
+!27 = metadata !{i32 6, i32 1, metadata !16, null}
diff --git a/test/CodeGen/Hexagon/hwloop-le.ll b/test/CodeGen/Hexagon/hwloop-le.ll
new file mode 100644
index 0000000..9c8cec7
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-le.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 28395, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 9073, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 21956, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 16782, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 19097, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 14040
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, 14040
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 13710
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, 13710
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 9920
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, 9920
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 18924
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, 18924
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 11812
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, 11812
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-lt.ll b/test/CodeGen/Hexagon/hwloop-lt.ll
new file mode 100644
index 0000000..7e43733
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-lt.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 8531, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9152, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 18851, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 25466, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9295, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 31236
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, 31236
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22653
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, 22653
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1431
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, 1431
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22403
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, 22403
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 21715
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, 21715
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-lt1.ll b/test/CodeGen/Hexagon/hwloop-lt1.ll
new file mode 100644
index 0000000..cf58740
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-lt1.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate a hardware loop instruction.
+; CHECK: endloop0
+
+@A = common global [400 x i8] zeroinitializer, align 8
+@B = common global [400 x i8] zeroinitializer, align 8
+@C = common global [400 x i8] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+  br label %polly.loop_body
+
+polly.loop_after:                                 ; preds = %polly.loop_body
+  ret void
+
+polly.loop_body:                                  ; preds = %entry, %polly.loop_body
+  %polly.loopiv16 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+  %polly.next_loopiv = add i32 %polly.loopiv16, 4
+  %p_vector_iv14 = or i32 %polly.loopiv16, 1
+  %p_vector_iv3 = add i32 %p_vector_iv14, 1
+  %p_vector_iv415 = or i32 %polly.loopiv16, 3
+  %p_arrayidx = getelementptr [400 x i8]* @A, i32 0, i32 %polly.loopiv16
+  %p_arrayidx5 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv14
+  %p_arrayidx6 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv3
+  %p_arrayidx7 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv415
+  store i8 123, i8* %p_arrayidx, align 1
+  store i8 123, i8* %p_arrayidx5, align 1
+  store i8 123, i8* %p_arrayidx6, align 1
+  store i8 123, i8* %p_arrayidx7, align 1
+  %0 = icmp slt i32 %polly.next_loopiv, 400
+  br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/hwloop-ne.ll b/test/CodeGen/Hexagon/hwloop-ne.ll
new file mode 100644
index 0000000..bceef2a
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-ne.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32623, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 29554, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 15692, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 10449, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32087, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 3472
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, 3472
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 8730
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, 8730
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1493
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, 1493
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1706
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, 1706
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1886
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, 1886
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/idxload-with-zero-offset.ll b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
new file mode 100644
index 0000000..ca6df88
--- /dev/null
+++ b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
@@ -0,0 +1,70 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instruction with (base + register offset << 0)
+
+; load word
+
+define i32 @load_w(i32* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i32* %a, i32 %tmp
+  %val = load i32* %scevgep9, align 4
+  ret i32 %val
+}
+
+; load unsigned half word
+
+define i16 @load_uh(i16* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memuh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i16* %a, i32 %tmp
+  %val = load i16* %scevgep9, align 2
+  ret i16 %val
+}
+
+; load signed half word
+
+define i32 @load_h(i16* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i16* %a, i32 %tmp
+  %val = load i16* %scevgep9, align 2
+  %conv = sext i16 %val to i32
+  ret i32 %conv
+}
+
+; load unsigned byte
+
+define i8 @load_ub(i8* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i8* %a, i32 %tmp
+  %val = load i8* %scevgep9, align 1
+  ret i8 %val
+}
+
+; load signed byte
+
+define i32 @foo_2(i8* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i8* %a, i32 %tmp
+  %val = load i8* %scevgep9, align 1
+  %conv = sext i8 %val to i32
+  ret i32 %conv
+}
+
+; load doubleword
+
+define i64 @load_d(i64* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}:{{[0-9]+}}{{ *}}={{ *}}memd(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i64* %a, i32 %tmp
+  %val = load i64* %scevgep9, align 8
+  ret i64 %val
+}
diff --git a/test/CodeGen/Hexagon/postinc-store.ll b/test/CodeGen/Hexagon/postinc-store.ll
new file mode 100644
index 0000000..99a3a58
--- /dev/null
+++ b/test/CodeGen/Hexagon/postinc-store.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that post-increment store instructions are being generated.
+; CHECK: memw(r{{[0-9]+}}{{ *}}++{{ *}}#4{{ *}}){{ *}}={{ *}}r{{[0-9]+}}
+
+define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %1 = load i16* %arrayidx1.phi, align 2
+  %conv = sext i16 %1 to i32
+  %factor = mul i32 %0, 2
+  %add3 = add i32 %factor, %conv
+  store i32 %add3, i32* %arrayidx.phi, align 4
+
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %exitcond = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/pred-absolute-store.ll b/test/CodeGen/Hexagon/pred-absolute-store.ll
new file mode 100644
index 0000000..b1b09f4
--- /dev/null
+++ b/test/CodeGen/Hexagon/pred-absolute-store.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we are able to predicate instructions with abosolute
+; addressing mode.
+
+; CHECK: if{{ *}}(p{{[0-3]+}}){{ *}}memw(##gvar){{ *}}={{ *}}r{{[0-9]+}}
+
+@gvar = external global i32
+define i32 @test2(i32 %a, i32 %b) nounwind {
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @gvar, align 4
+  br label %if.end
+
+if.end:
+  ret i32 %b
+}
diff --git a/test/CodeGen/Hexagon/predicate-copy.ll b/test/CodeGen/Hexagon/predicate-copy.ll
new file mode 100644
index 0000000..552b687
--- /dev/null
+++ b/test/CodeGen/Hexagon/predicate-copy.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = p{{[0-9]+}}
+define i1 @foo() {
+entry:
+  ret i1 false
+}
+
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index e488f33..f91300b 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}})
+; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
 ; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
 
 %struct.small = type { i32, i32 }
diff --git a/test/CodeGen/Hexagon/validate-offset.ll b/test/CodeGen/Hexagon/validate-offset.ll
new file mode 100644
index 0000000..9e7d0aa
--- /dev/null
+++ b/test/CodeGen/Hexagon/validate-offset.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s -O0
+
+; This is a regression test which makes sure that the offset check
+; is available for STRiw_indexed instruction. This is required
+; by 'Hexagon Expand Predicate Spill Code' pass.
+
+define i32 @f(i32 %a, i32 %b) nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %cmp = icmp sgt i32 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %2 = load i32* %a.addr, align 4
+  %3 = load i32* %b.addr, align 4
+  %add = add nsw i32 %2, %3
+  store i32 %add, i32* %retval
+  br label %return
+
+if.else:
+  %4 = load i32* %a.addr, align 4
+  %5 = load i32* %b.addr, align 4
+  %sub = sub nsw i32 %4, %5
+  store i32 %sub, i32* %retval
+  br label %return
+
+return:
+  %6 = load i32* %retval
+  ret i32 %6
+}
diff --git a/test/CodeGen/Mips/addi.ll b/test/CodeGen/Mips/addi.ll
new file mode 100644
index 0000000..8f70a46
--- /dev/null
+++ b/test/CodeGen/Mips/addi.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 6, align 4
+@j = global i32 12, align 4
+@k = global i32 15, align 4
+@l = global i32 20, align 4
+@.str = private unnamed_addr constant [13 x i8] c"%i %i %i %i\0A\00", align 1
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %sub = sub nsw i32 %1, 5
+  store i32 %sub, i32* @j, align 4
+  %2 = load i32* @k, align 4
+  %add1 = add nsw i32 %2, 10000
+  store i32 %add1, i32* @k, align 4
+  %3 = load i32* @l, align 4
+  %sub2 = sub nsw i32 %3, 10000
+  store i32 %sub2, i32* @l, align 4
+; 16: 	addiu	${{[0-9]+}}, 5	# 16 bit inst
+; 16: 	addiu	${{[0-9]+}}, -5	# 16 bit inst
+; 16: 	addiu	${{[0-9]+}}, 10000
+; 16: 	addiu	${{[0-9]+}}, -10000
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/align16.ll b/test/CodeGen/Mips/align16.ll
new file mode 100644
index 0000000..99139ab
--- /dev/null
+++ b/test/CodeGen/Mips/align16.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 25, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @p(i32* %i) nounwind {
+entry:
+  ret void
+}
+
+
+define void @foo() nounwind {
+entry:
+  %y = alloca [512 x i32], align 4
+  %x = alloca i32, align 8
+  %zz = alloca i32, align 4
+  %z = alloca i32, align 4
+  %0 = load i32* @i, align 4
+  %arrayidx = getelementptr inbounds [512 x i32]* %y, i32 0, i32 10
+  store i32 %0, i32* %arrayidx, align 4
+  %1 = load i32* @i, align 4
+  store i32 %1, i32* %x, align 8
+  call void @p(i32* %x)
+  %arrayidx1 = getelementptr inbounds [512 x i32]* %y, i32 0, i32 10
+  call void @p(i32* %arrayidx1)
+  ret void
+}
+; 16:	save	$ra, $s0, $s1, 2040
+; 16:	addiu	$sp, -48 # 16 bit inst
+; 16:	addiu	$sp, 48 # 16 bit inst
+; 16:	restore	$ra,  $s0, $s1, 2040
+\ No newline at end of file
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
index 1b5513a..9ca8d15 100644
--- a/test/CodeGen/Mips/br-jmp.ll
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC16
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
 
 define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
 entry:
@@ -11,3 +13,6 @@ bosco:                                            ; preds = %bosco, %entry
 
 ; CHECK-PIC: b	$BB0_1
 ; CHECK-STATIC: j	$BB0_1
+; CHECK-PIC16: b	$BB0_1
+; CHECK-STATIC16: b	$BB0_1
+
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index 897fc97..c3f483a 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -67,3 +67,16 @@ if.end:
   ret void
 }
 
+; Check that delay slot filler can place mov.s or mov.d in delay slot.
+;
+; Default:     foo6:
+; Default-NOT: nop
+
+define void @foo6(float %a0, double %a1) nounwind {
+entry:
+  tail call void @foo7(double %a1, float %a0) nounwind
+  ret void
+}
+
+declare void @foo7(double, float)
+
diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll
new file mode 100644
index 0000000..cf18fde
--- /dev/null
+++ b/test/CodeGen/Mips/eh-return32.ll
@@ -0,0 +1,81 @@
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+
+declare void @llvm.eh.return.i32(i32, i8*)
+declare void @foo(...)
+
+define i8* @f1(i32 %offset, i8* %handler) {
+entry:
+  call void (...)* @foo()
+  call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f1
+; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sw      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sw      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sw      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sw      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        or      $[[R0:[a-z0-9]+]], $5, $zero
+; CHECK:        or      $[[R1:[a-z0-9]+]], $4, $zero
+; CHECK:        or      $3, $[[R1]], $zero
+; CHECK:        or      $2, $[[R0]], $zero
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        lw      $4, [[offset0]]($sp)
+; CHECK:        lw      $5, [[offset1]]($sp)
+; CHECK:        lw      $6, [[offset2]]($sp)
+; CHECK:        lw      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; CHECK:        addiu   $sp, $sp, [[spoffset]]
+; CHECK:        or      $ra, $2, $zero
+; CHECK:        jr      $ra
+; CHECK:        addu    $sp, $sp, $3
+}
+
+define i8* @f2(i32 %offset, i8* %handler) {
+entry:
+  call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f2
+; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sw      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sw      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sw      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sw      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        or      $3, $4, $zero
+; CHECK:        or      $2, $5, $zero
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        lw      $4, [[offset0]]($sp)
+; CHECK:        lw      $5, [[offset1]]($sp)
+; CHECK:        lw      $6, [[offset2]]($sp)
+; CHECK:        lw      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; CHECK:        addiu   $sp, $sp, [[spoffset]]
+; CHECK:        or      $ra, $2, $zero
+; CHECK:        jr      $ra
+; CHECK:        addu    $sp, $sp, $3
+}
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
new file mode 100644
index 0000000..c410e1c
--- /dev/null
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -0,0 +1,83 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+declare void @llvm.eh.return.i64(i64, i8*)
+declare void @foo(...)
+
+define void @f1(i64 %offset, i8* %handler) {
+entry:
+  call void (...)* @foo()
+  call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f1
+; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sd      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sd      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sd      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sd      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        or      $[[R0:[a-z0-9]+]], $5, $zero
+; CHECK:        or      $[[R1:[a-z0-9]+]], $4, $zero
+; CHECK:        or      $3, $[[R1]], $zero
+; CHECK:        or      $2, $[[R0]], $zero
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        ld      $4, [[offset0]]($sp)
+; CHECK:        ld      $5, [[offset1]]($sp)
+; CHECK:        ld      $6, [[offset2]]($sp)
+; CHECK:        ld      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; CHECK:        daddiu  $sp, $sp, [[spoffset]]
+; CHECK:        or      $ra, $2, $zero
+; CHECK:        jr      $ra
+; CHECK:        daddu   $sp, $sp, $3
+
+}
+
+define void @f2(i64 %offset, i8* %handler) {
+entry:
+  call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f2
+; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sd      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sd      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sd      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sd      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        or      $3, $4, $zero
+; CHECK:        or      $2, $5, $zero
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        ld      $4, [[offset0]]($sp)
+; CHECK:        ld      $5, [[offset1]]($sp)
+; CHECK:        ld      $6, [[offset2]]($sp)
+; CHECK:        ld      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; CHECK:        daddiu  $sp, $sp, [[spoffset]]
+; CHECK:        or      $ra, $2, $zero
+; CHECK:        jr      $ra
+; CHECK:        daddu   $sp, $sp, $3
+
+}
diff --git a/test/CodeGen/Mips/fp16static.ll b/test/CodeGen/Mips/fp16static.ll
new file mode 100644
index 0000000..240ec75
--- /dev/null
+++ b/test/CodeGen/Mips/fp16static.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+
+@x = common global float 0.000000e+00, align 4
+
+define void @foo() nounwind {
+entry:
+  %0 = load float* @x, align 4
+  %1 = load float* @x, align 4
+  %mul = fmul float %0, %1
+  store float %mul, float* @x, align 4
+; CHECK-STATIC16: jal	__mips16_mulsf3
+  ret void
+}
diff --git a/test/CodeGen/Mips/frame-address.ll b/test/CodeGen/Mips/frame-address.ll
index e64e6d8..9b9ee21 100644
--- a/test/CodeGen/Mips/frame-address.ll
+++ b/test/CodeGen/Mips/frame-address.ll
@@ -7,6 +7,6 @@ entry:
   %0 = call i8* @llvm.frameaddress(i32 0)
   ret i8* %0
 
-; CHECK:   addu    $fp, $sp, $zero
-; CHECK:   or      $2, $fp, $zero
+; CHECK:   move    $fp, $sp
+; CHECK:   or    $2, $fp, $zero
 }
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index aee58b6..56ee607 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -1,9 +1,11 @@
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
 ;
-; re-enable this when mips16's jalr is fixed.
-; DISABLED: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -march=mipsel -mcpu=mips32  -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
 
 
 @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
@@ -15,7 +17,15 @@ entry:
 
 ; SR: 	.set	mips16                  # @main
 
-; SR:	save 	$ra, [[FS:[0-9]+]]
+; SR32: .set nomips16
+; SR32: .ent main
+; SR-NOT:  .set noreorder
+; SR-NOT:  .set nomacro
+; SR-NOT:  .set noat
+; SR32:  .set noreorder
+; SR32:  .set nomacro
+; SR32:  .set noat
+; SR:	save 	$ra, $s0, $s1, [[FS:[0-9]+]]
 ; PE:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
 ; PE: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
 ; PE:	sll	$[[T3:[0-9]+]], $[[T1]], 16
@@ -25,10 +35,23 @@ entry:
 ; C2:	move	$25, ${{[0-9]+}}
 ; C1:	move 	$gp, ${{[0-9]+}}
 ; C1:	jalrc 	${{[0-9]+}}
-; SR:	restore 	$ra, [[FS]]
+; SR:	restore 	$ra, $s0, $s1, [[FS]]
 ; PE:	li	$2, 0
 ; PE:	jrc 	$ra
 
+; ST1:  li	${{[0-9]+}}, %hi($.str)
+; ST1:  sll     ${{[0-9]+}}, ${{[0-9]+}}, 16
+; ST1:	addiu	${{[0-9]+}}, %lo($.str)
+; ST2:  li	${{[0-9]+}}, %hi($.str)
+; ST2:  jal     printf
 }
 
+;  SR-NOT:  .set at
+;  SR-NOT:  .set macro
+;  SR-NOT:  .set reorder
+;  SR32:  .set at
+;  SR32:  .set macro
+;  SR32:  .set reorder
+; SR:   .end main
+; SR32:   .end main
 declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/hf16_1.ll b/test/CodeGen/Mips/hf16_1.ll
new file mode 100644
index 0000000..c7454ee
--- /dev/null
+++ b/test/CodeGen/Mips/hf16_1.ll
@@ -0,0 +1,256 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=2
+
+
+@x = common global float 0.000000e+00, align 4
+@xd = common global double 0.000000e+00, align 8
+@y = common global float 0.000000e+00, align 4
+@yd = common global double 0.000000e+00, align 8
+@xy = common global { float, float } zeroinitializer, align 4
+@xyd = common global { double, double } zeroinitializer, align 8
+
+define void @foo() nounwind {
+entry:
+  %0 = load float* @x, align 4
+  call void @v_sf(float %0)
+  %1 = load double* @xd, align 8
+  call void @v_df(double %1)
+  %2 = load float* @x, align 4
+  %3 = load float* @y, align 4
+  call void @v_sf_sf(float %2, float %3)
+  %4 = load double* @xd, align 8
+  %5 = load float* @x, align 4
+  call void @v_df_sf(double %4, float %5)
+  %6 = load double* @xd, align 8
+  %7 = load double* @yd, align 8
+  call void @v_df_df(double %6, double %7)
+  %call = call float @sf_v()
+  %8 = load float* @x, align 4
+  %call1 = call float @sf_sf(float %8)
+  %9 = load double* @xd, align 8
+  %call2 = call float @sf_df(double %9)
+  %10 = load float* @x, align 4
+  %11 = load float* @y, align 4
+  %call3 = call float @sf_sf_sf(float %10, float %11)
+  %12 = load double* @xd, align 8
+  %13 = load float* @x, align 4
+  %call4 = call float @sf_df_sf(double %12, float %13)
+  %14 = load double* @xd, align 8
+  %15 = load double* @yd, align 8
+  %call5 = call float @sf_df_df(double %14, double %15)
+  %call6 = call double @df_v()
+  %16 = load float* @x, align 4
+  %call7 = call double @df_sf(float %16)
+  %17 = load double* @xd, align 8
+  %call8 = call double @df_df(double %17)
+  %18 = load float* @x, align 4
+  %19 = load float* @y, align 4
+  %call9 = call double @df_sf_sf(float %18, float %19)
+  %20 = load double* @xd, align 8
+  %21 = load float* @x, align 4
+  %call10 = call double @df_df_sf(double %20, float %21)
+  %22 = load double* @xd, align 8
+  %23 = load double* @yd, align 8
+  %call11 = call double @df_df_df(double %22, double %23)
+  %call12 = call { float, float } @sc_v()
+  %24 = extractvalue { float, float } %call12, 0
+  %25 = extractvalue { float, float } %call12, 1
+  %26 = load float* @x, align 4
+  %call13 = call { float, float } @sc_sf(float %26)
+  %27 = extractvalue { float, float } %call13, 0
+  %28 = extractvalue { float, float } %call13, 1
+  %29 = load double* @xd, align 8
+  %call14 = call { float, float } @sc_df(double %29)
+  %30 = extractvalue { float, float } %call14, 0
+  %31 = extractvalue { float, float } %call14, 1
+  %32 = load float* @x, align 4
+  %33 = load float* @y, align 4
+  %call15 = call { float, float } @sc_sf_sf(float %32, float %33)
+  %34 = extractvalue { float, float } %call15, 0
+  %35 = extractvalue { float, float } %call15, 1
+  %36 = load double* @xd, align 8
+  %37 = load float* @x, align 4
+  %call16 = call { float, float } @sc_df_sf(double %36, float %37)
+  %38 = extractvalue { float, float } %call16, 0
+  %39 = extractvalue { float, float } %call16, 1
+  %40 = load double* @xd, align 8
+  %41 = load double* @yd, align 8
+  %call17 = call { float, float } @sc_df_df(double %40, double %41)
+  %42 = extractvalue { float, float } %call17, 0
+  %43 = extractvalue { float, float } %call17, 1
+  %call18 = call { double, double } @dc_v()
+  %44 = extractvalue { double, double } %call18, 0
+  %45 = extractvalue { double, double } %call18, 1
+  %46 = load float* @x, align 4
+  %call19 = call { double, double } @dc_sf(float %46)
+  %47 = extractvalue { double, double } %call19, 0
+  %48 = extractvalue { double, double } %call19, 1
+  %49 = load double* @xd, align 8
+  %call20 = call { double, double } @dc_df(double %49)
+  %50 = extractvalue { double, double } %call20, 0
+  %51 = extractvalue { double, double } %call20, 1
+  %52 = load float* @x, align 4
+  %53 = load float* @y, align 4
+  %call21 = call { double, double } @dc_sf_sf(float %52, float %53)
+  %54 = extractvalue { double, double } %call21, 0
+  %55 = extractvalue { double, double } %call21, 1
+  %56 = load double* @xd, align 8
+  %57 = load float* @x, align 4
+  %call22 = call { double, double } @dc_df_sf(double %56, float %57)
+  %58 = extractvalue { double, double } %call22, 0
+  %59 = extractvalue { double, double } %call22, 1
+  %60 = load double* @xd, align 8
+  %61 = load double* @yd, align 8
+  %call23 = call { double, double } @dc_df_df(double %60, double %61)
+  %62 = extractvalue { double, double } %call23, 0
+  %63 = extractvalue { double, double } %call23, 1
+  ret void
+}
+
+declare void @v_sf(float)
+
+declare void @v_df(double)
+
+declare void @v_sf_sf(float, float)
+
+declare void @v_df_sf(double, float)
+
+declare void @v_df_df(double, double)
+
+declare float @sf_v()
+
+declare float @sf_sf(float)
+
+declare float @sf_df(double)
+
+declare float @sf_sf_sf(float, float)
+
+declare float @sf_df_sf(double, float)
+
+declare float @sf_df_df(double, double)
+
+declare double @df_v()
+
+declare double @df_sf(float)
+
+declare double @df_df(double)
+
+declare double @df_sf_sf(float, float)
+
+declare double @df_df_sf(double, float)
+
+declare double @df_df_df(double, double)
+
+declare { float, float } @sc_v()
+
+declare { float, float } @sc_sf(float)
+
+declare { float, float } @sc_df(double)
+
+declare { float, float } @sc_sf_sf(float, float)
+
+declare { float, float } @sc_df_sf(double, float)
+
+declare { float, float } @sc_df_df(double, double)
+
+declare { double, double } @dc_v()
+
+declare { double, double } @dc_sf(float)
+
+declare { double, double } @dc_df(double)
+
+declare { double, double } @dc_sf_sf(float, float)
+
+declare { double, double } @dc_df_sf(double, float)
+
+declare { double, double } @dc_df_df(double, double)
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df_df)(${{[0-9]+}})
+
+
+
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index e16e126..2012524 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -17,9 +17,9 @@ entry:
 ; CHECK: jalr $25
   tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
   %sub = add nsw i32 %i, -1
+; CHECK: lw $25, %call16(ff3)
 ; CHECK: sw $[[R1]], 28($sp)
 ; CHECK: sw $[[R0]], 24($sp)
-; CHECK: lw $25, %call16(ff3)
 ; CHECK: or $6, $[[R2]], $zero
 ; CHECK: or $7, $[[R3]], $zero
 ; CHECK: jalr $25
diff --git a/test/CodeGen/Mips/jtstat.ll b/test/CodeGen/Mips/jtstat.ll
new file mode 100644
index 0000000..6c1eb8d
--- /dev/null
+++ b/test/CodeGen/Mips/jtstat.ll
@@ -0,0 +1,71 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+
+@s = global i8 115, align 1
+@c = common global i8 0, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%c \0A\00", align 1
+
+define void @test(i32 %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.epilog [
+    i32 115, label %sw.bb
+    i32 105, label %sw.bb1
+    i32 100, label %sw.bb2
+    i32 108, label %sw.bb3
+    i32 99, label %sw.bb4
+    i32 68, label %sw.bb5
+    i32 81, label %sw.bb6
+    i32 76, label %sw.bb7
+  ]
+
+sw.bb:                                            ; preds = %entry
+  store i8 115, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb1:                                           ; preds = %entry
+  store i8 105, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry
+  store i8 100, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %entry
+  store i8 108, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  store i8 99, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb5:                                           ; preds = %entry
+  store i8 68, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb6:                                           ; preds = %entry
+  store i8 81, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb7:                                           ; preds = %entry
+  store i8 76, i8* @c, align 1
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb7, %sw.bb6, %sw.bb5, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+  ret void
+}
+
+; CHECK-STATIC16 	li	${{[0-9]+}}, %hi($JTI{{[0-9]+}}_{{[0-9]+}})
+; CHECK-STATIC16 	lw	${{[0-9]+}}, %lo($JTI{{[0-9]+}}_{{[0-9]+}})({{[0-9]+}})
+; CHECK-STATIC16: $JTI{{[0-9]+}}_{{[0-9]+}}:
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
diff --git a/test/CodeGen/Mips/largefr1.ll b/test/CodeGen/Mips/largefr1.ll
new file mode 100644
index 0000000..0fe89f7
--- /dev/null
+++ b/test/CodeGen/Mips/largefr1.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=1
+
+@i = common global i32 0, align 4
+@j = common global i32 0, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i %i \0A\00", align 1
+
+define void @foo(i32* %p, i32 %i, i32 %j) nounwind {
+entry:
+  %p.addr = alloca i32*, align 4
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  %0 = load i32* %j.addr, align 4
+  %1 = load i32** %p.addr, align 4
+  %2 = load i32* %i.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 %2
+  store i32 %0, i32* %add.ptr, align 4
+  ret void
+}
+
+define i32 @main() nounwind {
+entry:
+; 1: main: 
+; 1: 1: 	.word	-797992
+; 1:            li ${{[0-9]+}}, 12
+; 1:            sll ${{[0-9]+}}, ${{[0-9]+}}, 16
+; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 2:            move $sp, ${{[0-9]+}}
+; 2:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 1:            li ${{[0-9]+}}, 6
+; 1:            sll ${{[0-9]+}}, ${{[0-9]+}}, 16
+; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 2:            move $sp, ${{[0-9]+}}
+; 2:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 1:          	addiu	${{[0-9]+}}, ${{[0-9]+}}, 6800
+; 1: 	        li	${{[0-9]+}}, 1
+; 1:	        sll	${{[0-9]+}}, ${{[0-9]+}}, 16
+; 2: 	        li	${{[0-9]+}}, 34463
+  %retval = alloca i32, align 4
+  %one = alloca [100000 x i32], align 4
+  %two = alloca [100000 x i32], align 4
+  store i32 0, i32* %retval
+  %arrayidx = getelementptr inbounds [100000 x i32]* %one, i32 0, i32 0
+  call void @foo(i32* %arrayidx, i32 50, i32 9999)
+  %arrayidx1 = getelementptr inbounds [100000 x i32]* %two, i32 0, i32 0
+  call void @foo(i32* %arrayidx1, i32 99999, i32 5555)
+  %arrayidx2 = getelementptr inbounds [100000 x i32]* %one, i32 0, i32 50
+  %0 = load i32* %arrayidx2, align 4
+  store i32 %0, i32* @i, align 4
+  %arrayidx3 = getelementptr inbounds [100000 x i32]* %two, i32 0, i32 99999
+  %1 = load i32* %arrayidx3, align 4
+  store i32 %1, i32* @j, align 4
+  %2 = load i32* @i, align 4
+  %3 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/mips64-libcall.ll b/test/CodeGen/Mips/mips64-libcall.ll
new file mode 100644
index 0000000..c53ccfd
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-libcall.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -O3 < %s |\
+; RUN: FileCheck %s -check-prefix=HARD
+; RUN: llc -march=mips64el -mcpu=mips64r2 -soft-float < %s |\
+; RUN: FileCheck %s -check-prefix=SOFT
+
+; Check that %add is not passed in an integer register.
+;
+; HARD    : callfloor:
+; HARD-NOT: dmfc1 $4
+
+define double @callfloor(double %d) nounwind readnone {
+entry:
+  %add = fadd double %d, 1.000000e+00
+  %call = tail call double @floor(double %add) nounwind readnone
+  ret double %call
+}
+
+declare double @floor(double) nounwind readnone
+
+; Check call16.
+;
+; SOFT: f64add:
+; SOFT: ld $25, %call16(__adddf3)
+
+define double @f64add(double %a, double %b) {
+entry:
+  %add = fadd double %a, %b
+  ret double %add
+}
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index 5558ba6..0a8f85f 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -12,20 +12,20 @@ define void @f1() nounwind {
 entry:
 ; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)
 ; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
 ; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: sw  $[[R5]], 32($sp)
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
 ; CHECK: sw  $[[R4]], 28($sp)
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
 ; CHECK: sw  $[[R3]], 24($sp)
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
 ; CHECK: sw  $[[R7]], 20($sp)
 ; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
 ; CHECK: sw  $[[R2]], 16($sp)
-; CHECK: lw  $7, 4($[[R0]])
 ; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK: lw  $7, 4($[[R0]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
   call void @callee2(%struct.S2* byval @f1.s2) nounwind
diff --git a/test/CodeGen/Mips/selTBteqzCmpi.ll b/test/CodeGen/Mips/selTBteqzCmpi.ll
new file mode 100644
index 0000000..9cb8227
--- /dev/null
+++ b/test/CodeGen/Mips/selTBteqzCmpi.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i = 2\0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp eq i32 %0, 10
+  %1 = load i32* @i, align 4
+  %2 = load i32* @j, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+
+; 16:	cmpi	${{[0-9]+}}, 10
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/selTBtnezCmpi.ll b/test/CodeGen/Mips/selTBtnezCmpi.ll
new file mode 100644
index 0000000..bd334f5
--- /dev/null
+++ b/test/CodeGen/Mips/selTBtnezCmpi.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i = 1\0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp ne i32 %0, 10
+  %1 = load i32* @i, align 4
+  %2 = load i32* @j, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+; 16:	cmpi	${{[0-9]+}}, 10
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+
diff --git a/test/CodeGen/Mips/selTBtnezSlti.ll b/test/CodeGen/Mips/selTBtnezSlti.ll
new file mode 100644
index 0000000..593f6f2
--- /dev/null
+++ b/test/CodeGen/Mips/selTBtnezSlti.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [9 x i8] c"%i = 2 \0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 10
+  %1 = load i32* @j, align 4
+  %2 = load i32* @i, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slti	${{[0-9]+}}, 10
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/seleq.ll b/test/CodeGen/Mips/seleq.ll
new file mode 100644
index 0000000..190baad
--- /dev/null
+++ b/test/CodeGen/Mips/seleq.ll
@@ -0,0 +1,95 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+
+define void @calc_seleq() nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp eq i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp eq i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp eq i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" }
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/seleqk.ll b/test/CodeGen/Mips/seleqk.ll
new file mode 100644
index 0000000..3ca622d
--- /dev/null
+++ b/test/CodeGen/Mips/seleqk.ll
@@ -0,0 +1,91 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 1000, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_seleqk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @a, align 4
+  %cmp1 = icmp eq i32 %3, 1000
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @b, align 4
+  %cmp6 = icmp eq i32 %6, 3
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @b, align 4
+  %cmp11 = icmp eq i32 %9, 1000
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmpi	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
diff --git a/test/CodeGen/Mips/selgek.ll b/test/CodeGen/Mips/selgek.ll
new file mode 100644
index 0000000..8ab4046
--- /dev/null
+++ b/test/CodeGen/Mips/selgek.ll
@@ -0,0 +1,94 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 2, align 4
+@b = global i32 1000, align 4
+@c = global i32 2, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp sge i32 %0, 1000
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp sge i32 %3, 1
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @c, align 4
+  %cmp6 = icmp sge i32 %6, 2
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @a, align 4
+  %cmp11 = icmp sge i32 %9, 2
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slti	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 2 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 2 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/selgt.ll b/test/CodeGen/Mips/selgt.ll
new file mode 100644
index 0000000..67b9b49
--- /dev/null
+++ b/test/CodeGen/Mips/selgt.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [9 x i8] c"%i = %i\0A\00", align 1
+
+define i32 @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %retval = alloca i32, align 4
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sgt i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp sgt i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp sgt i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp sgt i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  %16 = load i32* %retval
+  ret i32 %16
+}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
diff --git a/test/CodeGen/Mips/selle.ll b/test/CodeGen/Mips/selle.ll
new file mode 100644
index 0000000..b27df45
--- /dev/null
+++ b/test/CodeGen/Mips/selle.ll
@@ -0,0 +1,96 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sle i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp sle i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp sle i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp sle i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
diff --git a/test/CodeGen/Mips/selltk.ll b/test/CodeGen/Mips/selltk.ll
new file mode 100644
index 0000000..1471b89
--- /dev/null
+++ b/test/CodeGen/Mips/selltk.ll
@@ -0,0 +1,93 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 2, align 4
+@b = global i32 1000, align 4
+@c = global i32 2, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_selltk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 1000
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp slt i32 %3, 2
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @c, align 4
+  %cmp6 = icmp sgt i32 %6, 2
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @a, align 4
+  %cmp11 = icmp sgt i32 %9, 2
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/selne.ll b/test/CodeGen/Mips/selne.ll
new file mode 100644
index 0000000..e3d82b8
--- /dev/null
+++ b/test/CodeGen/Mips/selne.ll
@@ -0,0 +1,97 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_seleq() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp ne i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp ne i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp ne i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp ne i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/selnek.ll b/test/CodeGen/Mips/selnek.ll
new file mode 100644
index 0000000..2601552
--- /dev/null
+++ b/test/CodeGen/Mips/selnek.ll
@@ -0,0 +1,107 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 1000, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp ne i32 %0, 1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @a, align 4
+  %cmp1 = icmp ne i32 %3, 1000
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @b, align 4
+  %cmp6 = icmp ne i32 %6, 3
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @b, align 4
+  %cmp11 = icmp ne i32 %9, 1000
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define i32 @main() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  call void @calc_z() "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %0 = load i32* @z1, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %0) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %1 = load i32* @z2, align 4
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %2 = load i32* @z3, align 4
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %3 = load i32* @z4, align 4
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %3) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmpi	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+\ No newline at end of file
diff --git a/test/CodeGen/Mips/selpat.ll b/test/CodeGen/Mips/selpat.ll
index cda0c96..8eda8de 100644
--- a/test/CodeGen/Mips/selpat.ll
+++ b/test/CodeGen/Mips/selpat.ll
@@ -20,7 +20,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   store i32 %cond, i32* @z2, align 4
   %4 = load i32* @c, align 4
@@ -41,7 +41,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmpi	${{[0-9]+}}, 1
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp eq i32 %0, 10
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
@@ -51,7 +51,7 @@ entry:
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
 ; 16:	cmpi	${{[0-9]+}}, 10
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp11 = icmp eq i32 %3, 10
   %cond15 = select i1 %cmp11, i32 %1, i32 %2
@@ -67,7 +67,7 @@ entry:
   %2 = load i32* @f, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	beqz	${{[0-9]+}}, .+4
+; 16:	beqz	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp eq i32 %3, 0
@@ -91,7 +91,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp sge i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -112,7 +112,7 @@ entry:
   %1 = load i32* @b, align 4
   %cmp = icmp sgt i32 %0, %1
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %2 = load i32* @f, align 4
   %3 = load i32* @t, align 4
@@ -141,7 +141,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp sle i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -165,7 +165,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	slti	${{[0-9]+}}, {{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp slt i32 %3, 2
@@ -192,7 +192,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   store i32 %cond, i32* @z2, align 4
   %4 = load i32* @c, align 4
@@ -212,7 +212,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmpi	${{[0-9]+}}, 1
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ne i32 %0, 10
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
@@ -222,7 +222,7 @@ entry:
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
 ; 16:	cmpi	${{[0-9]+}}, 10
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp11 = icmp ne i32 %3, 10
   %cond15 = select i1 %cmp11, i32 %1, i32 %2
@@ -238,7 +238,7 @@ entry:
   %2 = load i32* @t, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	bnez	${{[0-9]+}}, .+4
+; 16:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp ne i32 %3, 0
@@ -260,7 +260,7 @@ entry:
   %2 = load i32* @t, align 4
   %cond = select i1 %tobool, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	bnez	${{[0-9]+}}, .+4
+; 16:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %tobool1 = icmp ne i32 %3, 0
@@ -284,7 +284,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp uge i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -309,7 +309,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ugt i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -334,7 +334,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ule i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
diff --git a/test/CodeGen/Mips/seteq.ll b/test/CodeGen/Mips/seteq.ll
index da840c8..5fadf78 100644
--- a/test/CodeGen/Mips/seteq.ll
+++ b/test/CodeGen/Mips/seteq.ll
@@ -15,7 +15,7 @@ entry:
   store i32 %conv, i32* @r1, align 4
 ; 16:	xor	$[[REGISTER:[0-9A-Ba-b_]+]], ${{[0-9]+}}
 ; 16:	sltiu	$[[REGISTER:[0-9A-Ba-b_]+]], 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
 
diff --git a/test/CodeGen/Mips/seteqz.ll b/test/CodeGen/Mips/seteqz.ll
index d445be6..80dc312 100644
--- a/test/CodeGen/Mips/seteqz.ll
+++ b/test/CodeGen/Mips/seteqz.ll
@@ -12,13 +12,13 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltiu	${{[0-9]+}}, 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   %1 = load i32* @j, align 4
   %cmp1 = icmp eq i32 %1, 99
   %conv2 = zext i1 %cmp1 to i32
   store i32 %conv2, i32* @r2, align 4
 ; 16:	xor	$[[REGISTER:[0-9A-Ba-b_]+]], ${{[0-9]+}}
 ; 16:	sltiu	$[[REGISTER:[0-9A-Ba-b_]+]], 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setge.ll b/test/CodeGen/Mips/setge.ll
index 94b499b..8869eb8 100644
--- a/test/CodeGen/Mips/setge.ll
+++ b/test/CodeGen/Mips/setge.ll
@@ -17,7 +17,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp sge i32 %0, %2
diff --git a/test/CodeGen/Mips/setgek.ll b/test/CodeGen/Mips/setgek.ll
index b6bae09..18a0fcf 100644
--- a/test/CodeGen/Mips/setgek.ll
+++ b/test/CodeGen/Mips/setgek.ll
@@ -12,7 +12,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slti	${{[0-9]+}}, -32768
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
 ; 16:	xor	${{[0-9]+}}, ${{[0-9]+}}
   ret void
 }
diff --git a/test/CodeGen/Mips/setle.ll b/test/CodeGen/Mips/setle.ll
index f36fb43..2df6774 100644
--- a/test/CodeGen/Mips/setle.ll
+++ b/test/CodeGen/Mips/setle.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp sle i32 %2, %1
diff --git a/test/CodeGen/Mips/setlt.ll b/test/CodeGen/Mips/setlt.ll
index 435be8e..3dac74b 100644
--- a/test/CodeGen/Mips/setlt.ll
+++ b/test/CodeGen/Mips/setlt.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setltk.ll b/test/CodeGen/Mips/setltk.ll
index c0b610e..ecebc7e 100644
--- a/test/CodeGen/Mips/setltk.ll
+++ b/test/CodeGen/Mips/setltk.ll
@@ -15,6 +15,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slti	$[[REGISTER:[0-9]+]], 10
-; 16:	move	$[[REGISTER]], $t8
+; 16:	move	$[[REGISTER]], $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setne.ll b/test/CodeGen/Mips/setne.ll
index 6460c83..9e66901 100644
--- a/test/CodeGen/Mips/setne.ll
+++ b/test/CodeGen/Mips/setne.ll
@@ -15,6 +15,6 @@ entry:
   store i32 %conv, i32* @r1, align 4
 ; 16:	xor	$[[REGISTER:[0-9]+]], ${{[0-9]+}}
 ; 16:	sltu	${{[0-9]+}}, $[[REGISTER]]
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setuge.ll b/test/CodeGen/Mips/setuge.ll
index ac72b66..1c9b5bb 100644
--- a/test/CodeGen/Mips/setuge.ll
+++ b/test/CodeGen/Mips/setuge.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move    $[[REGISTER:[0-9]+]], $t8
+; 16:	move    $[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp uge i32 %0, %2
diff --git a/test/CodeGen/Mips/setugt.ll b/test/CodeGen/Mips/setugt.ll
index 328f0e3..f10b47a 100644
--- a/test/CodeGen/Mips/setugt.ll
+++ b/test/CodeGen/Mips/setugt.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move    ${{[0-9]+}}, $t8
+; 16:	move    ${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setule.ll b/test/CodeGen/Mips/setule.ll
index 792f2ae..a6d6bf0 100644
--- a/test/CodeGen/Mips/setule.ll
+++ b/test/CodeGen/Mips/setule.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp ule i32 %2, %1
diff --git a/test/CodeGen/Mips/setult.ll b/test/CodeGen/Mips/setult.ll
index 56d2e8d..00ee437 100644
--- a/test/CodeGen/Mips/setult.ll
+++ b/test/CodeGen/Mips/setult.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setultk.ll b/test/CodeGen/Mips/setultk.ll
index 75b270e..eb9edba 100644
--- a/test/CodeGen/Mips/setultk.ll
+++ b/test/CodeGen/Mips/setultk.ll
@@ -14,7 +14,7 @@ entry:
   %cmp = icmp ult i32 %0, 10
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
-; 16:	sltiu	$[[REGISTER:[0-9]+]], 10
-; 16:	move	$[[REGISTER]], $t8
+; 16:	sltiu	${{[0-9]+}}, 10 # 16 bit inst
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index 72d30dc..b86d25e 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -21,9 +21,9 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
-; STATIC:   rdhwr   $3, $29
 ; STATIC:   lui     $[[R0:[0-9]+]], %tprel_hi(t1)
 ; STATIC:   addiu   $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
+; STATIC:   rdhwr   $3, $29
 ; STATIC:   addu    $[[R2:[0-9]+]], $3, $[[R1]]
 ; STATIC:   lw      $2, 0($[[R2]])
 }
diff --git a/test/CodeGen/NVPTX/intrin-nocapture.ll b/test/CodeGen/NVPTX/intrin-nocapture.ll
new file mode 100644
index 0000000..55781bb
--- /dev/null
+++ b/test/CodeGen/NVPTX/intrin-nocapture.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+; Address space intrinsics were erroneously marked NoCapture, leading to bad
+; optimizations (such as the store below being eliminated as dead code). This
+; test makes sure we don't regress.
+
+declare void @foo(i32 addrspace(1)*)
+
+declare i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32*)
+
+; CHECK: @bar
+define void @bar() {
+  %t1 = alloca i32
+; CHECK: call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1)
+; CHECK-NEXT: store i32 10, i32* %t1
+  %t2 = call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1)
+  store i32 10, i32* %t1
+  call void @foo(i32 addrspace(1)* %t2)
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll
new file mode 100644
index 0000000..f5a1795
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-loads.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Even though general vector types are not supported in PTX, we can still
+; optimize loads/stores with pseudo-vector instructions of the form:
+;
+; ld.v2.f32 {%f0, %f1}, [%r0]
+;
+; which will load two floats at once into scalar registers.
+
+define void @foo(<2 x float>* %a) {
+; CHECK: .func foo
+; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
+  %t1 = load <2 x float>* %a
+  %t2 = fmul <2 x float> %t1, %t1
+  store <2 x float> %t2, <2 x float>* %a
+  ret void
+}
+
+define void @foo2(<4 x float>* %a) {
+; CHECK: .func foo2
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
+  %t1 = load <4 x float>* %a
+  %t2 = fmul <4 x float> %t1, %t1
+  store <4 x float> %t2, <4 x float>* %a
+  ret void
+}
+
+define void @foo3(<8 x float>* %a) {
+; CHECK: .func foo3
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16];
+  %t1 = load <8 x float>* %a
+  %t2 = fmul <8 x float> %t1, %t1
+  store <8 x float> %t2, <8 x float>* %a
+  ret void
+}
+
+
+
+define void @foo4(<2 x i32>* %a) {
+; CHECK: .func foo4
+; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
+  %t1 = load <2 x i32>* %a
+  %t2 = mul <2 x i32> %t1, %t1
+  store <2 x i32> %t2, <2 x i32>* %a
+  ret void
+}
+
+define void @foo5(<4 x i32>* %a) {
+; CHECK: .func foo5
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
+  %t1 = load <4 x i32>* %a
+  %t2 = mul <4 x i32> %t1, %t1
+  store <4 x i32> %t2, <4 x i32>* %a
+  ret void
+}
+
+define void @foo6(<8 x i32>* %a) {
+; CHECK: .func foo6
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16];
+  %t1 = load <8 x i32>* %a
+  %t2 = mul <8 x i32> %t1, %t1
+  store <8 x i32> %t2, <8 x i32>* %a
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index 8802b97..00a402e 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
+; XFAIL: *
 
 define <4 x i32> @test() nounwind {
 	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/PowerPC/a2q-stackalign.ll b/test/CodeGen/PowerPC/a2q-stackalign.ll
new file mode 100644
index 0000000..00c3291
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2q-stackalign.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck -check-prefix=CHECK-A2Q %s
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc64-bgq-linux -mcpu=a2 | FileCheck -check-prefix=CHECK-BGQ %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @bar(i8* %a) nounwind;
+define i32 @foo() nounwind {
+  %p = alloca i8, i8 115
+  store i8 0, i8* %p
+  %r = call i32 @bar(i8* %p)
+  ret i32 %r
+}
+
+; Without QPX, the allocated stack frame is 240 bytes, but with QPX
+; (because we require 32-byte alignment), it is 256 bytes.
+; CHECK-A2: @foo
+; CHECK-A2: stdu 1, -240(1)
+; CHECK-A2Q: @foo
+; CHECK-A2Q: stdu 1, -256(1)
+; CHECK-BGQ: @foo
+; CHECK-BGQ: stdu 1, -256(1)
+
diff --git a/test/CodeGen/PowerPC/a2q.ll b/test/CodeGen/PowerPC/a2q.ll
new file mode 100644
index 0000000..b26480f
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2q.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -mattr=+qpx | FileCheck %s
+
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: @foo
+
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
new file mode 100644
index 0000000..52587e2
--- /dev/null
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -0,0 +1,99 @@
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+; Test case for PR 14779: anonymous aggregates are not handled correctly.
+; The bug is triggered by passing a byval structure after an anonymous
+; aggregate.
+
+%tarray = type { i64, i8* }
+
+define i8* @func1({ i64, i8* } %array, i8* %ptr) {
+entry:
+  %array_ptr = extractvalue {i64, i8* } %array, 1
+  %cond = icmp eq i8* %array_ptr, %ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array_ptr
+unequal:
+  ret i8* %ptr
+}
+
+; CHECK: func1:
+; CHECK: cmpld {{[0-9]+}}, 4, 5
+; CHECK: std 4, -[[OFFSET1:[0-9]+]]
+; CHECK: std 5, -[[OFFSET2:[0-9]+]]
+; CHECK: ld 3, -[[OFFSET1]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+
+
+define i8* @func2({ i64, i8* } %array1, %tarray* byval %array2) {
+entry:
+  %array1_ptr = extractvalue {i64, i8* } %array1, 1
+  %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func2:
+; CHECK: addi [[REG1:[0-9]+]], 1, 64
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG1]])
+; CHECK: cmpld {{[0-9]+}}, 4, [[REG2]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]]
+; CHECK: std 4, -[[OFFSET2:[0-9]+]]
+; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+
+define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) {
+entry:
+  %tmp1 = getelementptr inbounds { i64, i8* }* %array1, i32 0, i32 1
+  %array1_ptr = load i8** %tmp1
+  %tmp2 = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp2
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func3:
+; CHECK: addi [[REG1:[0-9]+]], 1, 64
+; CHECK: addi [[REG2:[0-9]+]], 1, 48
+; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
+; CHECK: ld [[REG4:[0-9]+]], 8([[REG2]])
+; CHECK: cmpld {{[0-9]+}}, [[REG4]], [[REG3]]
+; CHECK: std [[REG3]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG4]], -[[OFFSET2:[0-9]+]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+
+define i8* @func4(i64 %p1, i64 %p2, i64 %p3, i64 %p4,
+                  i64 %p5, i64 %p6, i64 %p7, i64 %p8,
+                  { i64, i8* } %array1, %tarray* byval %array2) {
+entry:
+  %array1_ptr = extractvalue {i64, i8* } %array1, 1
+  %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func4:
+; CHECK: addi [[REG1:[0-9]+]], 1, 128
+; CHECK: ld [[REG2:[0-9]+]], 120(1)
+; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
+; CHECK: cmpld {{[0-9]+}}, [[REG2]], [[REG3]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+
diff --git a/test/CodeGen/PowerPC/complex-return.ll b/test/CodeGen/PowerPC/complex-return.ll
new file mode 100644
index 0000000..f12152f
--- /dev/null
+++ b/test/CodeGen/PowerPC/complex-return.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define { ppc_fp128, ppc_fp128 } @foo() nounwind {
+entry:
+  %retval = alloca { ppc_fp128, ppc_fp128 }, align 16
+  %x = alloca { ppc_fp128, ppc_fp128 }, align 16
+  %real = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+  %imag = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+  store ppc_fp128 0xM400C0000000000000000000000000000, ppc_fp128* %real
+  store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE147A, ppc_fp128* %imag
+  %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+  %x.real = load ppc_fp128* %x.realp
+  %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+  %x.imag = load ppc_fp128* %x.imagp
+  %real1 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 0
+  %imag2 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 1
+  store ppc_fp128 %x.real, ppc_fp128* %real1
+  store ppc_fp128 %x.imag, ppc_fp128* %imag2
+  %0 = load { ppc_fp128, ppc_fp128 }* %retval
+  ret { ppc_fp128, ppc_fp128 } %0
+}
+
+; CHECK: foo:
+; CHECK: lfd 3
+; CHECK: lfd 4
+; CHECK: lfd 2
+; CHECK: lfd 1
+
+define { float, float } @oof() nounwind {
+entry:
+  %retval = alloca { float, float }, align 4
+  %x = alloca { float, float }, align 4
+  %real = getelementptr inbounds { float, float }* %x, i32 0, i32 0
+  %imag = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+  store float 3.500000e+00, float* %real
+  store float 0xC00547AE20000000, float* %imag
+  %x.realp = getelementptr inbounds { float, float }* %x, i32 0, i32 0
+  %x.real = load float* %x.realp
+  %x.imagp = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+  %x.imag = load float* %x.imagp
+  %real1 = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+  %imag2 = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+  store float %x.real, float* %real1
+  store float %x.imag, float* %imag2
+  %0 = load { float, float }* %retval
+  ret { float, float } %0
+}
+
+; CHECK: oof:
+; CHECK: lfs 2
+; CHECK: lfs 1
+
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index e161cb0..8d87cf7 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -17,10 +17,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/PowerPC/float-asmprint.ll b/test/CodeGen/PowerPC/float-asmprint.ll
new file mode 100644
index 0000000..c9dc028
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-asmprint.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=powerpc64-none-linux < %s | FileCheck %s
+
+; Check that all current floating-point types are correctly emitted to assembly
+; on a big-endian target. x86_fp80 can't actually print for unrelated reasons,
+; but that's not really a problem.
+
+@var128 = global fp128 0xL00000000000000008000000000000000, align 16
+@varppc128 = global ppc_fp128 0xM80000000000000000000000000000000, align 16
+@var64 = global double -0.0, align 8
+@var32 = global float -0.0, align 4
+@var16 = global half -0.0, align 2
+
+; CHECK: var128:
+; CHECK-NEXT: .quad -9223372036854775808      # fp128 -0
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .size
+
+; CHECK: varppc128:
+; CHECK-NEXT: .quad -9223372036854775808      # ppc_fp128 -0
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .size
+
+; CHECK: var64:
+; CHECK-NEXT: .quad -9223372036854775808      # double -0
+; CHECK-NEXT: .size
+
+; CHECK: var32:
+; CHECK-NEXT: .long 2147483648                # float -0
+; CHECK-NEXT: .size
+
+; CHECK: var16:
+; CHECK-NEXT: .short 32768                    # half -0
+; CHECK-NEXT: .size
+
diff --git a/test/CodeGen/PowerPC/fp128.ll b/test/CodeGen/PowerPC/fp128.ll
deleted file mode 100644
index a0b06a4..0000000
--- a/test/CodeGen/PowerPC/fp128.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc -mtriple=powerpc64-none-linux < %s | FileCheck --check-prefix=BIGENDIAN %s
-
-@var = global fp128 0xL00000000000000008000000000000000
-
-; CHECK-BIGENDIAN: var:
-; CHECK-BIGENDIAN-NEXT: .quad   -9223372036854775808    # fp128 -0
-; CHECK-BIGENDIAN-NEXT: .quad   0
-
diff --git a/test/CodeGen/PowerPC/load-shift-combine.ll b/test/CodeGen/PowerPC/load-shift-combine.ll
new file mode 100644
index 0000000..a5d1224
--- /dev/null
+++ b/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+; This used to cause a crash.  A standard load is converted to a pre-increment
+; load.  Later the pre-increment load is combined with a subsequent SRL to
+; produce a smaller load.  This transform invalidly created a standard load
+; and propagated the produced value into uses of both produced values of the
+; pre-increment load.  The result was a crash when attempting to process an
+; add with a token-chain operand.
+
+%struct.Info = type { i32, i32, i8*, i8*, i8*, [32 x i8*], i64, [32 x i64], i64, i64, i64, [32 x i64] }
+%struct.S1847 = type { [12 x i8], [4 x i8], [8 x i8], [4 x i8], [8 x i8], [2 x i8], i8, [4 x i64], i8, [3 x i8], [4 x i8], i8, i16, [4 x %struct.anon.76], i16, i8, i8* }
+%struct.anon.76 = type { i32 }
+@info = common global %struct.Info zeroinitializer, align 8
+@fails = common global i32 0, align 4
+@a1847 = external global [5 x %struct.S1847]
+define void @test1847() nounwind {
+entry:
+  %j = alloca i32, align 4
+  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %1 = load i32* @fails, align 4
+  %bf.load1 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear2 = and i96 %bf.load1, 302231454903657293676543
+  %bf.set3 = or i96 %bf.clear2, -38383394772764476296921088
+  store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %2 = load i32* %j, align 4
+  %3 = load i32* %j, align 4
+  %inc11 = add nsw i32 %3, 1
+  store i32 %inc11, i32* %j, align 4
+  %bf.load15 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear16 = and i96 %bf.load15, -18446744069414584321
+  %bf.set17 = or i96 %bf.clear16, 18446743532543672320
+  store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/mcm-1.ll b/test/CodeGen/PowerPC/mcm-1.ll
index 62fe88c..a57fb9d 100644
--- a/test/CodeGen/PowerPC/mcm-1.ll
+++ b/test/CodeGen/PowerPC/mcm-1.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading and storing an external variable.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
new file mode 100644
index 0000000..4bec3e1
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; CHECK: test_fn_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .local [[VAR]]
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll
new file mode 100644
index 0000000..f2bc4c9
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-11.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; CHECK: test_file_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .data
+; CHECK: .globl [[VAR]]
+; CHECK: [[VAR]]:
+; CHECK: .long 5
diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll
new file mode 100644
index 0000000..911305d
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-12.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK: .quad 4562098671269285104
+; CHECK: test_double_const:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
index 45df0ab..f0dff4c 100644
--- a/test/CodeGen/PowerPC/mcm-2.ll
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -1,6 +1,7 @@
-; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading and storing a static variable scoped to a function.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
@@ -16,11 +17,21 @@ entry:
   ret i32 %0
 }
 
-; CHECK: test_fn_static:
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
-; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
-; CHECK: stw {{[0-9]+}}, 0([[REG2]])
-; CHECK: .type [[VAR]],@object
-; CHECK: .local [[VAR]]
-; CHECK: .comm [[VAR]],4,4
+; MEDIUM: test_fn_static:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: .type [[VAR]],@object
+; MEDIUM: .local [[VAR]]
+; MEDIUM: .comm [[VAR]],4,4
+
+; LARGE: test_fn_static:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
+; LARGE: stw {{[0-9]+}}, 0([[REG2]])
+; LARGE: .type [[VAR]],@object
+; LARGE: .local [[VAR]]
+; LARGE: .comm [[VAR]],4,4
+
diff --git a/test/CodeGen/PowerPC/mcm-3.ll b/test/CodeGen/PowerPC/mcm-3.ll
index 0e7bbe7..b790550 100644
--- a/test/CodeGen/PowerPC/mcm-3.ll
+++ b/test/CodeGen/PowerPC/mcm-3.ll
@@ -1,6 +1,7 @@
-; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading and storing a file-scope static variable.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
@@ -16,13 +17,25 @@ entry:
   ret i32 %0
 }
 
-; CHECK: test_file_static:
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
-; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
-; CHECK: stw {{[0-9]+}}, 0([[REG2]])
-; CHECK: .type [[VAR]],@object
-; CHECK: .data
-; CHECK: .globl [[VAR]]
-; CHECK: [[VAR]]:
-; CHECK: .long 5
+; MEDIUM: test_file_static:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: .type [[VAR]],@object
+; MEDIUM: .data
+; MEDIUM: .globl [[VAR]]
+; MEDIUM: [[VAR]]:
+; MEDIUM: .long 5
+
+; LARGE: test_file_static:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
+; LARGE: stw {{[0-9]+}}, 0([[REG2]])
+; LARGE: .type [[VAR]],@object
+; LARGE: .data
+; LARGE: .globl [[VAR]]
+; LARGE: [[VAR]]:
+; LARGE: .long 5
+
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
index db36d0b..47c60c9 100644
--- a/test/CodeGen/PowerPC/mcm-4.ll
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -1,6 +1,7 @@
-; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading a value from the constant pool (TOC-relative).
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
@@ -11,9 +12,16 @@ entry:
   ret double 0x3F4FD4920B498CF0
 }
 
-; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
-; CHECK: .quad 4562098671269285104
-; CHECK: test_double_const:
-; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
-; CHECK: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
-; CHECK: lfd {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: [[VAR:[a-z0-9A-Z_.]+]]:
+; MEDIUM: .quad 4562098671269285104
+; MEDIUM: test_double_const:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lfd {{[0-9]+}}, 0([[REG2]])
+
+; LARGE: [[VAR:[a-z0-9A-Z_.]+]]:
+; LARGE: .quad 4562098671269285104
+; LARGE: test_double_const:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lfd {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
index 10d89f5..1be27b7 100644
--- a/test/CodeGen/PowerPC/mcm-5.ll
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading the address of a jump table from the TOC.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
diff --git a/test/CodeGen/PowerPC/mcm-6.ll b/test/CodeGen/PowerPC/mcm-6.ll
index 0a7fa76..35efaaa 100644
--- a/test/CodeGen/PowerPC/mcm-6.ll
+++ b/test/CodeGen/PowerPC/mcm-6.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading and storing a tentatively defined variable.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
diff --git a/test/CodeGen/PowerPC/mcm-7.ll b/test/CodeGen/PowerPC/mcm-7.ll
index 0e9fa2b..0dd39ee 100644
--- a/test/CodeGen/PowerPC/mcm-7.ll
+++ b/test/CodeGen/PowerPC/mcm-7.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading a function address.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
diff --git a/test/CodeGen/PowerPC/mcm-8.ll b/test/CodeGen/PowerPC/mcm-8.ll
index 9381a97..3ece786 100644
--- a/test/CodeGen/PowerPC/mcm-8.ll
+++ b/test/CodeGen/PowerPC/mcm-8.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading a variable with available-externally linkage.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
diff --git a/test/CodeGen/PowerPC/mcm-9.ll b/test/CodeGen/PowerPC/mcm-9.ll
index 422607c..f366f45 100644
--- a/test/CodeGen/PowerPC/mcm-9.ll
+++ b/test/CodeGen/PowerPC/mcm-9.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
 
-; Test correct code generation for medium code model (32-bit TOC offsets)
+; Test correct code generation for medium and large code model
 ; for loading and storing an aliased external variable.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
new file mode 100644
index 0000000..2dd1718
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -0,0 +1,77 @@
+; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 4
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:       Relocation 5
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; CHECK:       Relocation 6
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 7
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
index ec1b7b0..117c3b3 100644
--- a/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=LARGE %s
 
 ; FIXME: When asm-parse is available, could make this an assembly test.
 
@@ -19,15 +21,25 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing external variable ei.
 ;
-; CHECK:       '.rela.text'
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000040
+; MEDIUM:       '.rela.text'
+; MEDIUM:       Relocation 0
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 1
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM1]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       '.rela.text'
+; LARGE:       Relocation 0
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 1
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM1]]
+; LARGE-NEXT:  'r_type', 0x00000040
 
 @test_fn_static.si = internal global i32 0, align 4
 
@@ -42,14 +54,26 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing function-scoped variable si.
 ;
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 3
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
-; CHECK-NEXT:  'r_type', 0x00000030
+; MEDIUM:       Relocation 2
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 3
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM2]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function-scoped variable si.
+;
+; LARGE:       Relocation 2
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 3
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM2]]
+; LARGE-NEXT:  'r_type', 0x00000040
 
 @gi = global i32 5, align 4
 
@@ -64,14 +88,26 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing file-scope variable gi.
 ;
-; CHECK:       Relocation 4
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 5
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
-; CHECK-NEXT:  'r_type', 0x00000030
+; MEDIUM:       Relocation 4
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 5
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM3]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing file-scope variable gi.
+;
+; LARGE:       Relocation 4
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 5
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM3]]
+; LARGE-NEXT:  'r_type', 0x00000040
 
 define double @test_double_const() nounwind {
 entry:
@@ -81,14 +117,26 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing a constant.
 ;
-; CHECK:       Relocation 6
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 7
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM4]]
-; CHECK-NEXT:  'r_type', 0x00000030
+; MEDIUM:       Relocation 6
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 7
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM4]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing a constant.
+;
+; LARGE:       Relocation 6
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 7
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM4]]
+; LARGE-NEXT:  'r_type', 0x00000040
 
 define signext i32 @test_jump_table(i32 signext %i) nounwind {
 entry:
@@ -137,14 +185,23 @@ sw.epilog:                                        ; preds = %sw.bb3, %sw.default
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing a jump table address.
 ;
-; CHECK:       Relocation 8
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 9
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM5]]
-; CHECK-NEXT:  'r_type', 0x00000040
+; MEDIUM:       Relocation 8
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 9
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM5]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 8
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 9
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM5]]
+; LARGE-NEXT:  'r_type', 0x00000040
 
 @ti = common global i32 0, align 4
 
@@ -159,14 +216,23 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing tentatively declared variable ti.
 ;
-; CHECK:       Relocation 10
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 11
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM6]]
-; CHECK-NEXT:  'r_type', 0x00000040
+; MEDIUM:       Relocation 10
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 11
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM6]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 10
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 11
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM6]]
+; LARGE-NEXT:  'r_type', 0x00000040
 
 define i8* @test_fnaddr() nounwind {
 entry:
@@ -182,12 +248,21 @@ declare signext i32 @foo(i32 signext)
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing function address foo.
 ;
-; CHECK:       Relocation 12
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 13
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM7]]
-; CHECK-NEXT:  'r_type', 0x00000040
+; MEDIUM:       Relocation 12
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 13
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM7]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 12
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 13
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM7]]
+; LARGE-NEXT:  'r_type', 0x00000040
 
diff --git a/test/CodeGen/ARM/misched-inorder-latency.ll b/test/CodeGen/PowerPC/misched-inorder-latency.ll
index 8c06b4c..8fae7ad 100644
--- a/test/CodeGen/ARM/misched-inorder-latency.ll
+++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -enable-misched -march=thumb -mcpu=swift \
-; RUN:          -pre-RA-sched=source -scheditins=false -ilp-window=0 \
+; RUN: llc < %s -enable-misched -pre-RA-sched=source -scheditins=false \
 ; RUN:          -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
 ;
-; For these tests, we set -ilp-window=0 to simulate in order processor.
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
 
-; %val1 is a 3-cycle load live out of %entry. It should be hoisted
+; %val1 is a load live out of %entry. It should be hoisted
 ; above the add.
-; CHECK: @testload
+; CHECK: testload:
 ; CHECK: %entry
-; CHECK: ldr
-; CHECK: adds
+; CHECK: lwz
+; CHECK: addi
 ; CHECK: bne
 ; CHECK: %true
 define i32 @testload(i32 *%ptr, i32 %sumin) {
@@ -34,15 +34,22 @@ end:
 ; The prefetch gets a default latency of 3 cycles and should be hoisted
 ; above the add.
 ;
-; CHECK: @testprefetch
+; CHECK: testprefetch:
 ; CHECK: %entry
-; CHECK: pld
-; CHECK: adds
-; CHECK: bx
+; CHECK: dcbt
+; CHECK: addi
+; CHECK: blr
 define i32 @testprefetch(i8 *%ptr, i32 %i) {
 entry:
-  %tmp = add i32 %i, 1
+  %val1 = add i32 %i, 1
   tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
-  ret i32 %tmp
+  %p = icmp eq i32 %i, 0
+  br i1 %p, label %true, label %end
+true:
+  %val2 = add i32 %val1, 1
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  ret i32 %valmerge
 }
 declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/pr15031.ll b/test/CodeGen/PowerPC/pr15031.ll
new file mode 100644
index 0000000..5ccf941
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15031.ll
@@ -0,0 +1,370 @@
+; RUN: llc -mcpu=pwr7 -O3 < %s | FileCheck %s
+
+; Test case derived from bug report 15031.  The code in the post-RA
+; scheduler to break critical anti-dependencies was failing to check
+; whether an instruction had more than one definition, and ensuring
+; that any additional definitions interfered with the choice of a new
+; register.  As a result, this test originally caused this to be
+; generated:
+;
+;   lbzu 3, 1(3)
+;
+; which is illegal, since it requires register 3 to both receive the
+; loaded value and receive the updated address.  With the fix to bug
+; 15031, a different register is chosen to receive the loaded value.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%"class.llvm::MachineMemOperand" = type { %"struct.llvm::MachinePointerInfo", i64, i32, %"class.llvm::MDNode"*, %"class.llvm::MDNode"* }
+%"struct.llvm::MachinePointerInfo" = type { %"class.llvm::Value"*, i64 }
+%"class.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"class.llvm::Type"*, %"class.llvm::Use"*, %"class.llvm::StringMapEntry"* }
+%"class.llvm::Type" = type { %"class.llvm::LLVMContext"*, i32, i32, %"class.llvm::Type"** }
+%"class.llvm::LLVMContext" = type { %"class.llvm::LLVMContextImpl"* }
+%"class.llvm::LLVMContextImpl" = type opaque
+%"class.llvm::Use" = type { %"class.llvm::Value"*, %"class.llvm::Use"*, %"class.llvm::PointerIntPair" }
+%"class.llvm::PointerIntPair" = type { i64 }
+%"class.llvm::StringMapEntry" = type opaque
+%"class.llvm::MDNode" = type { %"class.llvm::Value", %"class.llvm::FoldingSetImpl::Node", i32, i32 }
+%"class.llvm::FoldingSetImpl::Node" = type { i8* }
+%"class.llvm::MachineInstr" = type { %"class.llvm::ilist_node", %"class.llvm::MCInstrDesc"*, %"class.llvm::MachineBasicBlock"*, %"class.llvm::MachineOperand"*, i32, %"class.llvm::ArrayRecycler<llvm::MachineOperand, 8>::Capacity", i8, i8, i8, %"class.llvm::MachineMemOperand"**, %"class.llvm::DebugLoc" }
+%"class.llvm::ilist_node" = type { %"class.llvm::ilist_half_node", %"class.llvm::MachineInstr"* }
+%"class.llvm::ilist_half_node" = type { %"class.llvm::MachineInstr"* }
+%"class.llvm::MCInstrDesc" = type { i16, i16, i16, i16, i16, i32, i64, i16*, i16*, %"class.llvm::MCOperandInfo"* }
+%"class.llvm::MCOperandInfo" = type { i16, i8, i8, i32 }
+%"class.llvm::MachineBasicBlock" = type { %"class.llvm::ilist_node.0", %"struct.llvm::ilist", %"class.llvm::BasicBlock"*, i32, %"class.llvm::MachineFunction"*, %"class.std::vector.163", %"class.std::vector.163", %"class.std::vector.123", %"class.std::vector.123", i32, i8, i8 }
+%"class.llvm::ilist_node.0" = type { %"class.llvm::ilist_half_node.1", %"class.llvm::MachineBasicBlock"* }
+%"class.llvm::ilist_half_node.1" = type { %"class.llvm::MachineBasicBlock"* }
+%"struct.llvm::ilist" = type { %"class.llvm::iplist" }
+%"class.llvm::iplist" = type { %"struct.llvm::ilist_traits", %"class.llvm::MachineInstr"* }
+%"struct.llvm::ilist_traits" = type { %"class.llvm::ilist_half_node", %"class.llvm::MachineBasicBlock"* }
+%"class.llvm::BasicBlock" = type { %"class.llvm::Value", %"class.llvm::ilist_node.2", %"class.llvm::iplist.4", %"class.llvm::Function"* }
+%"class.llvm::ilist_node.2" = type { %"class.llvm::ilist_half_node.3", %"class.llvm::BasicBlock"* }
+%"class.llvm::ilist_half_node.3" = type { %"class.llvm::BasicBlock"* }
+%"class.llvm::iplist.4" = type { %"struct.llvm::ilist_traits.5", %"class.llvm::Instruction"* }
+%"struct.llvm::ilist_traits.5" = type { %"class.llvm::ilist_half_node.10" }
+%"class.llvm::ilist_half_node.10" = type { %"class.llvm::Instruction"* }
+%"class.llvm::Instruction" = type { %"class.llvm::User", %"class.llvm::ilist_node.193", %"class.llvm::BasicBlock"*, %"class.llvm::DebugLoc" }
+%"class.llvm::User" = type { %"class.llvm::Value", %"class.llvm::Use"*, i32 }
+%"class.llvm::ilist_node.193" = type { %"class.llvm::ilist_half_node.10", %"class.llvm::Instruction"* }
+%"class.llvm::DebugLoc" = type { i32, i32 }
+%"class.llvm::Function" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.27", %"class.llvm::iplist.47", %"class.llvm::iplist.54", %"class.llvm::ValueSymbolTable"*, %"class.llvm::AttributeSet" }
+%"class.llvm::GlobalValue" = type { [52 x i8], [4 x i8], %"class.llvm::Module"*, %"class.std::basic_string" }
+%"class.llvm::Module" = type { %"class.llvm::LLVMContext"*, %"class.llvm::iplist.11", %"class.llvm::iplist.20", %"class.llvm::iplist.29", %"struct.llvm::ilist.38", %"class.std::basic_string", %"class.llvm::ValueSymbolTable"*, %"class.llvm::OwningPtr", %"class.std::basic_string", %"class.std::basic_string", %"class.std::basic_string", i8* }
+%"class.llvm::iplist.11" = type { %"struct.llvm::ilist_traits.12", %"class.llvm::GlobalVariable"* }
+%"struct.llvm::ilist_traits.12" = type { %"class.llvm::ilist_node.18" }
+%"class.llvm::ilist_node.18" = type { %"class.llvm::ilist_half_node.19", %"class.llvm::GlobalVariable"* }
+%"class.llvm::ilist_half_node.19" = type { %"class.llvm::GlobalVariable"* }
+%"class.llvm::GlobalVariable" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.18", i8 }
+%"class.llvm::iplist.20" = type { %"struct.llvm::ilist_traits.21", %"class.llvm::Function"* }
+%"struct.llvm::ilist_traits.21" = type { %"class.llvm::ilist_node.27" }
+%"class.llvm::ilist_node.27" = type { %"class.llvm::ilist_half_node.28", %"class.llvm::Function"* }
+%"class.llvm::ilist_half_node.28" = type { %"class.llvm::Function"* }
+%"class.llvm::iplist.29" = type { %"struct.llvm::ilist_traits.30", %"class.llvm::GlobalAlias"* }
+%"struct.llvm::ilist_traits.30" = type { %"class.llvm::ilist_node.36" }
+%"class.llvm::ilist_node.36" = type { %"class.llvm::ilist_half_node.37", %"class.llvm::GlobalAlias"* }
+%"class.llvm::ilist_half_node.37" = type { %"class.llvm::GlobalAlias"* }
+%"class.llvm::GlobalAlias" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.36" }
+%"struct.llvm::ilist.38" = type { %"class.llvm::iplist.39" }
+%"class.llvm::iplist.39" = type { %"struct.llvm::ilist_traits.40", %"class.llvm::NamedMDNode"* }
+%"struct.llvm::ilist_traits.40" = type { %"class.llvm::ilist_node.45" }
+%"class.llvm::ilist_node.45" = type { %"class.llvm::ilist_half_node.46", %"class.llvm::NamedMDNode"* }
+%"class.llvm::ilist_half_node.46" = type { %"class.llvm::NamedMDNode"* }
+%"class.llvm::NamedMDNode" = type { %"class.llvm::ilist_node.45", %"class.std::basic_string", %"class.llvm::Module"*, i8* }
+%"class.std::basic_string" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"class.llvm::ValueSymbolTable" = type opaque
+%"class.llvm::OwningPtr" = type { %"class.llvm::GVMaterializer"* }
+%"class.llvm::GVMaterializer" = type opaque
+%"class.llvm::iplist.47" = type { %"struct.llvm::ilist_traits.48", %"class.llvm::BasicBlock"* }
+%"struct.llvm::ilist_traits.48" = type { %"class.llvm::ilist_half_node.3" }
+%"class.llvm::iplist.54" = type { %"struct.llvm::ilist_traits.55", %"class.llvm::Argument"* }
+%"struct.llvm::ilist_traits.55" = type { %"class.llvm::ilist_half_node.61" }
+%"class.llvm::ilist_half_node.61" = type { %"class.llvm::Argument"* }
+%"class.llvm::Argument" = type { %"class.llvm::Value", %"class.llvm::ilist_node.192", %"class.llvm::Function"* }
+%"class.llvm::ilist_node.192" = type { %"class.llvm::ilist_half_node.61", %"class.llvm::Argument"* }
+%"class.llvm::AttributeSet" = type { %"class.llvm::AttributeSetImpl"* }
+%"class.llvm::AttributeSetImpl" = type opaque
+%"class.llvm::MachineFunction" = type { %"class.llvm::Function"*, %"class.llvm::TargetMachine"*, %"class.llvm::MCContext"*, %"class.llvm::MachineModuleInfo"*, %"class.llvm::GCModuleInfo"*, %"class.llvm::MachineRegisterInfo"*, %"struct.llvm::MachineFunctionInfo"*, %"class.llvm::MachineFrameInfo"*, %"class.llvm::MachineConstantPool"*, %"class.llvm::MachineJumpTableInfo"*, %"class.std::vector.163", %"class.llvm::BumpPtrAllocator", %"class.llvm::Recycler", %"class.llvm::ArrayRecycler", %"class.llvm::Recycler.180", %"struct.llvm::ilist.181", i32, i32, i8 }
+%"class.llvm::TargetMachine" = type { i32 (...)**, %"class.llvm::Target"*, %"class.std::basic_string", %"class.std::basic_string", %"class.std::basic_string", %"class.llvm::MCCodeGenInfo"*, %"class.llvm::MCAsmInfo"*, i8, %"class.llvm::TargetOptions" }
+%"class.llvm::Target" = type opaque
+%"class.llvm::MCCodeGenInfo" = type opaque
+%"class.llvm::MCAsmInfo" = type opaque
+%"class.llvm::TargetOptions" = type { [2 x i8], i32, i8, i32, i8, %"class.std::basic_string", i32, i32 }
+%"class.llvm::MCContext" = type { %"class.llvm::SourceMgr"*, %"class.llvm::MCAsmInfo"*, %"class.llvm::MCRegisterInfo"*, %"class.llvm::MCObjectFileInfo"*, %"class.llvm::BumpPtrAllocator", %"class.llvm::StringMap", %"class.llvm::StringMap.62", i32, %"class.llvm::DenseMap.63", i8*, %"class.llvm::raw_ostream"*, i8, %"class.std::basic_string", %"class.std::basic_string", %"class.std::vector", %"class.std::vector.70", %"class.llvm::MCDwarfLoc", i8, i8, i32, %"class.llvm::MCSection"*, %"class.llvm::MCSymbol"*, %"class.llvm::MCSymbol"*, %"class.std::vector.75", %"class.llvm::StringRef", %"class.llvm::StringRef", i8, %"class.llvm::DenseMap.80", %"class.std::vector.84", i8*, i8*, i8*, i8 }
+%"class.llvm::SourceMgr" = type opaque
+%"class.llvm::MCRegisterInfo" = type { %"struct.llvm::MCRegisterDesc"*, i32, i32, i32, %"class.llvm::MCRegisterClass"*, i32, i32, [2 x i16]*, i16*, i8*, i16*, i32, i16*, i32, i32, i32, i32, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"class.llvm::DenseMap" }
+%"struct.llvm::MCRegisterDesc" = type { i32, i32, i32, i32, i32, i32 }
+%"class.llvm::MCRegisterClass" = type { i8*, i16*, i8*, i16, i16, i16, i16, i16, i8, i8 }
+%"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair" = type { i32, i32 }
+%"class.llvm::DenseMap" = type { %"struct.std::pair"*, i32, i32, i32 }
+%"struct.std::pair" = type { i32, i32 }
+%"class.llvm::MCObjectFileInfo" = type opaque
+%"class.llvm::BumpPtrAllocator" = type { i64, i64, %"class.llvm::SlabAllocator"*, %"class.llvm::MemSlab"*, i8*, i8*, i64 }
+%"class.llvm::SlabAllocator" = type { i32 (...)** }
+%"class.llvm::MemSlab" = type { i64, %"class.llvm::MemSlab"* }
+%"class.llvm::StringMap" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocator"* }
+%"class.llvm::StringMapImpl" = type { %"class.llvm::StringMapEntryBase"**, i32, i32, i32, i32 }
+%"class.llvm::StringMapEntryBase" = type { i32 }
+%"class.llvm::StringMap.62" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocator"* }
+%"class.llvm::DenseMap.63" = type { %"struct.std::pair.66"*, i32, i32, i32 }
+%"struct.std::pair.66" = type opaque
+%"class.llvm::raw_ostream" = type { i32 (...)**, i8*, i8*, i8*, i32 }
+%"class.std::vector" = type { %"struct.std::_Vector_base" }
+%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base<llvm::MCDwarfFile *, std::allocator<llvm::MCDwarfFile *> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MCDwarfFile *, std::allocator<llvm::MCDwarfFile *> >::_Vector_impl" = type { %"class.llvm::MCDwarfFile"**, %"class.llvm::MCDwarfFile"**, %"class.llvm::MCDwarfFile"** }
+%"class.llvm::MCDwarfFile" = type { %"class.llvm::StringRef", i32 }
+%"class.llvm::StringRef" = type { i8*, i64 }
+%"class.std::vector.70" = type { %"struct.std::_Vector_base.71" }
+%"struct.std::_Vector_base.71" = type { %"struct.std::_Vector_base<llvm::StringRef, std::allocator<llvm::StringRef> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::StringRef, std::allocator<llvm::StringRef> >::_Vector_impl" = type { %"class.llvm::StringRef"*, %"class.llvm::StringRef"*, %"class.llvm::StringRef"* }
+%"class.llvm::MCDwarfLoc" = type { i32, i32, i32, i32, i32, i32 }
+%"class.llvm::MCSection" = type opaque
+%"class.llvm::MCSymbol" = type { %"class.llvm::StringRef", %"class.llvm::MCSection"*, %"class.llvm::MCExpr"*, i8 }
+%"class.llvm::MCExpr" = type opaque
+%"class.std::vector.75" = type { %"struct.std::_Vector_base.76" }
+%"struct.std::_Vector_base.76" = type { %"struct.std::_Vector_base<const llvm::MCGenDwarfLabelEntry *, std::allocator<const llvm::MCGenDwarfLabelEntry *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::MCGenDwarfLabelEntry *, std::allocator<const llvm::MCGenDwarfLabelEntry *> >::_Vector_impl" = type { %"class.llvm::MCGenDwarfLabelEntry"**, %"class.llvm::MCGenDwarfLabelEntry"**, %"class.llvm::MCGenDwarfLabelEntry"** }
+%"class.llvm::MCGenDwarfLabelEntry" = type { %"class.llvm::StringRef", i32, i32, %"class.llvm::MCSymbol"* }
+%"class.llvm::DenseMap.80" = type { %"struct.std::pair.83"*, i32, i32, i32 }
+%"struct.std::pair.83" = type { %"class.llvm::MCSection"*, %"class.llvm::MCLineSection"* }
+%"class.llvm::MCLineSection" = type { %"class.std::vector.215" }
+%"class.std::vector.215" = type { %"struct.std::_Vector_base.216" }
+%"struct.std::_Vector_base.216" = type { %"struct.std::_Vector_base<llvm::MCLineEntry, std::allocator<llvm::MCLineEntry> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MCLineEntry, std::allocator<llvm::MCLineEntry> >::_Vector_impl" = type { %"class.llvm::MCLineEntry"*, %"class.llvm::MCLineEntry"*, %"class.llvm::MCLineEntry"* }
+%"class.llvm::MCLineEntry" = type { %"class.llvm::MCDwarfLoc", %"class.llvm::MCSymbol"* }
+%"class.std::vector.84" = type { %"struct.std::_Vector_base.85" }
+%"struct.std::_Vector_base.85" = type { %"struct.std::_Vector_base<const llvm::MCSection *, std::allocator<const llvm::MCSection *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::MCSection *, std::allocator<const llvm::MCSection *> >::_Vector_impl" = type { %"class.llvm::MCSection"**, %"class.llvm::MCSection"**, %"class.llvm::MCSection"** }
+%"class.llvm::MachineModuleInfo" = type { %"class.llvm::ImmutablePass", %"class.llvm::MCContext", %"class.llvm::Module"*, %"class.llvm::MachineModuleInfoImpl"*, %"class.std::vector.95", i32, %"class.std::vector.100", %"class.llvm::DenseMap.110", %"class.llvm::DenseMap.114", i32, %"class.std::vector.118", %"class.std::vector.123", %"class.std::vector.123", %"class.std::vector.128", %"class.llvm::SmallPtrSet", %"class.llvm::MMIAddrLabelMap"*, i8, i8, i8, i8, %"class.llvm::SmallVector.133" }
+%"class.llvm::ImmutablePass" = type { %"class.llvm::ModulePass" }
+%"class.llvm::ModulePass" = type { %"class.llvm::Pass" }
+%"class.llvm::Pass" = type { i32 (...)**, %"class.llvm::AnalysisResolver"*, i8*, i32 }
+%"class.llvm::AnalysisResolver" = type { %"class.std::vector.89", %"class.llvm::PMDataManager"* }
+%"class.std::vector.89" = type { %"struct.std::_Vector_base.90" }
+%"struct.std::_Vector_base.90" = type { %"struct.std::_Vector_base<std::pair<const void *, llvm::Pass *>, std::allocator<std::pair<const void *, llvm::Pass *> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<const void *, llvm::Pass *>, std::allocator<std::pair<const void *, llvm::Pass *> > >::_Vector_impl" = type { %"struct.std::pair.94"*, %"struct.std::pair.94"*, %"struct.std::pair.94"* }
+%"struct.std::pair.94" = type { i8*, %"class.llvm::Pass"* }
+%"class.llvm::PMDataManager" = type opaque
+%"class.llvm::MachineModuleInfoImpl" = type { i32 (...)** }
+%"class.std::vector.95" = type { %"struct.std::_Vector_base.96" }
+%"struct.std::_Vector_base.96" = type { %"struct.std::_Vector_base<llvm::MachineMove, std::allocator<llvm::MachineMove> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineMove, std::allocator<llvm::MachineMove> >::_Vector_impl" = type { %"class.llvm::MachineMove"*, %"class.llvm::MachineMove"*, %"class.llvm::MachineMove"* }
+%"class.llvm::MachineMove" = type { %"class.llvm::MCSymbol"*, %"class.llvm::MachineLocation", %"class.llvm::MachineLocation" }
+%"class.llvm::MachineLocation" = type { i8, i32, i32 }
+%"class.std::vector.100" = type { %"struct.std::_Vector_base.101" }
+%"struct.std::_Vector_base.101" = type { %"struct.std::_Vector_base<llvm::LandingPadInfo, std::allocator<llvm::LandingPadInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::LandingPadInfo, std::allocator<llvm::LandingPadInfo> >::_Vector_impl" = type { %"struct.llvm::LandingPadInfo"*, %"struct.llvm::LandingPadInfo"*, %"struct.llvm::LandingPadInfo"* }
+%"struct.llvm::LandingPadInfo" = type { %"class.llvm::MachineBasicBlock"*, %"class.llvm::SmallVector", %"class.llvm::SmallVector", %"class.llvm::MCSymbol"*, %"class.llvm::Function"*, %"class.std::vector.105" }
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", %"struct.llvm::SmallVectorStorage" }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* }
+%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::AlignedCharArray" = type { [8 x i8] }
+%"struct.llvm::SmallVectorStorage" = type { i8 }
+%"class.std::vector.105" = type { %"struct.std::_Vector_base.106" }
+%"struct.std::_Vector_base.106" = type { %"struct.std::_Vector_base<int, std::allocator<int> >::_Vector_impl" }
+%"struct.std::_Vector_base<int, std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"class.llvm::DenseMap.110" = type { %"struct.std::pair.113"*, i32, i32, i32 }
+%"struct.std::pair.113" = type { %"class.llvm::MCSymbol"*, %"class.llvm::SmallVector.206" }
+%"class.llvm::SmallVector.206" = type { [28 x i8], %"struct.llvm::SmallVectorStorage.207" }
+%"struct.llvm::SmallVectorStorage.207" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.198"] }
+%"struct.llvm::AlignedCharArrayUnion.198" = type { %"struct.llvm::AlignedCharArray.199" }
+%"struct.llvm::AlignedCharArray.199" = type { [4 x i8] }
+%"class.llvm::DenseMap.114" = type { %"struct.std::pair.117"*, i32, i32, i32 }
+%"struct.std::pair.117" = type { %"class.llvm::MCSymbol"*, i32 }
+%"class.std::vector.118" = type { %"struct.std::_Vector_base.119" }
+%"struct.std::_Vector_base.119" = type { %"struct.std::_Vector_base<const llvm::GlobalVariable *, std::allocator<const llvm::GlobalVariable *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::GlobalVariable *, std::allocator<const llvm::GlobalVariable *> >::_Vector_impl" = type { %"class.llvm::GlobalVariable"**, %"class.llvm::GlobalVariable"**, %"class.llvm::GlobalVariable"** }
+%"class.std::vector.123" = type { %"struct.std::_Vector_base.124" }
+%"struct.std::_Vector_base.124" = type { %"struct.std::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl" }
+%"struct.std::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"class.std::vector.128" = type { %"struct.std::_Vector_base.129" }
+%"struct.std::_Vector_base.129" = type { %"struct.std::_Vector_base<const llvm::Function *, std::allocator<const llvm::Function *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::Function *, std::allocator<const llvm::Function *> >::_Vector_impl" = type { %"class.llvm::Function"**, %"class.llvm::Function"**, %"class.llvm::Function"** }
+%"class.llvm::SmallPtrSet" = type { %"class.llvm::SmallPtrSetImpl", [33 x i8*] }
+%"class.llvm::SmallPtrSetImpl" = type { i8**, i8**, i32, i32, i32 }
+%"class.llvm::MMIAddrLabelMap" = type opaque
+%"class.llvm::SmallVector.133" = type { %"class.llvm::SmallVectorImpl.134", %"struct.llvm::SmallVectorStorage.139" }
+%"class.llvm::SmallVectorImpl.134" = type { %"class.llvm::SmallVectorTemplateBase.135" }
+%"class.llvm::SmallVectorTemplateBase.135" = type { %"class.llvm::SmallVectorTemplateCommon.136" }
+%"class.llvm::SmallVectorTemplateCommon.136" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.137" }
+%"struct.llvm::AlignedCharArrayUnion.137" = type { %"struct.llvm::AlignedCharArray.138" }
+%"struct.llvm::AlignedCharArray.138" = type { [40 x i8] }
+%"struct.llvm::SmallVectorStorage.139" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.137"] }
+%"class.llvm::GCModuleInfo" = type opaque
+%"class.llvm::MachineRegisterInfo" = type { %"class.llvm::TargetRegisterInfo"*, i8, i8, %"class.llvm::IndexedMap", %"class.llvm::IndexedMap.146", %"class.llvm::MachineOperand"**, %"class.llvm::BitVector", %"class.llvm::BitVector", %"class.llvm::BitVector", %"class.std::vector.147", %"class.std::vector.123" }
+%"class.llvm::TargetRegisterInfo" = type { i32 (...)**, %"class.llvm::MCRegisterInfo", %"struct.llvm::TargetRegisterInfoDesc"*, i8**, i32*, %"class.llvm::TargetRegisterClass"**, %"class.llvm::TargetRegisterClass"** }
+%"struct.llvm::TargetRegisterInfoDesc" = type { i32, i8 }
+%"class.llvm::TargetRegisterClass" = type { %"class.llvm::MCRegisterClass"*, i32*, i32*, i16*, %"class.llvm::TargetRegisterClass"**, void (%"class.llvm::ArrayRef"*, %"class.llvm::MachineFunction"*)* }
+%"class.llvm::ArrayRef" = type { i16*, i64 }
+%"class.llvm::IndexedMap" = type { %"class.std::vector.140", %"struct.std::pair.145", %"struct.llvm::VirtReg2IndexFunctor" }
+%"class.std::vector.140" = type { %"struct.std::_Vector_base.141" }
+%"struct.std::_Vector_base.141" = type { %"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *>, std::allocator<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *>, std::allocator<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *> > >::_Vector_impl" = type { %"struct.std::pair.145"*, %"struct.std::pair.145"*, %"struct.std::pair.145"* }
+%"struct.std::pair.145" = type { %"class.llvm::TargetRegisterClass"*, %"class.llvm::MachineOperand"* }
+%"class.llvm::MachineOperand" = type { i8, [3 x i8], %union.anon, %"class.llvm::MachineInstr"*, %union.anon.188 }
+%union.anon = type { i32 }
+%union.anon.188 = type { %struct.anon }
+%struct.anon = type { %"class.llvm::MachineOperand"*, %"class.llvm::MachineOperand"* }
+%"struct.llvm::VirtReg2IndexFunctor" = type { i8 }
+%"class.llvm::IndexedMap.146" = type { %"class.std::vector.147", %"struct.std::pair.152", %"struct.llvm::VirtReg2IndexFunctor" }
+%"class.std::vector.147" = type { %"struct.std::_Vector_base.148" }
+%"struct.std::_Vector_base.148" = type { %"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>, std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>, std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" = type { %"struct.std::pair.152"*, %"struct.std::pair.152"*, %"struct.std::pair.152"* }
+%"struct.std::pair.152" = type { i32, i32 }
+%"class.llvm::BitVector" = type { i64*, i32, i32 }
+%"struct.llvm::MachineFunctionInfo" = type { i32 (...)** }
+%"class.llvm::MachineFrameInfo" = type opaque
+%"class.llvm::MachineConstantPool" = type { %"class.llvm::DataLayout"*, i32, %"class.std::vector.153", %"class.llvm::DenseSet" }
+%"class.llvm::DataLayout" = type opaque
+%"class.std::vector.153" = type { %"struct.std::_Vector_base.154" }
+%"struct.std::_Vector_base.154" = type { %"struct.std::_Vector_base<llvm::MachineConstantPoolEntry, std::allocator<llvm::MachineConstantPoolEntry> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineConstantPoolEntry, std::allocator<llvm::MachineConstantPoolEntry> >::_Vector_impl" = type { %"class.llvm::MachineConstantPoolEntry"*, %"class.llvm::MachineConstantPoolEntry"*, %"class.llvm::MachineConstantPoolEntry"* }
+%"class.llvm::MachineConstantPoolEntry" = type { %union.anon.158, i32 }
+%union.anon.158 = type { %"class.llvm::Constant"* }
+%"class.llvm::Constant" = type { %"class.llvm::User" }
+%"class.llvm::DenseSet" = type { %"class.llvm::DenseMap.159" }
+%"class.llvm::DenseMap.159" = type { %"struct.std::pair.162"*, i32, i32, i32 }
+%"struct.std::pair.162" = type { %"class.llvm::MachineConstantPoolValue"*, i8 }
+%"class.llvm::MachineConstantPoolValue" = type { i32 (...)**, %"class.llvm::Type"* }
+%"class.llvm::MachineJumpTableInfo" = type opaque
+%"class.std::vector.163" = type { %"struct.std::_Vector_base.164" }
+%"struct.std::_Vector_base.164" = type { %"struct.std::_Vector_base<llvm::MachineBasicBlock *, std::allocator<llvm::MachineBasicBlock *> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineBasicBlock *, std::allocator<llvm::MachineBasicBlock *> >::_Vector_impl" = type { %"class.llvm::MachineBasicBlock"**, %"class.llvm::MachineBasicBlock"**, %"class.llvm::MachineBasicBlock"** }
+%"class.llvm::Recycler" = type { %"class.llvm::iplist.168" }
+%"class.llvm::iplist.168" = type { %"struct.llvm::ilist_traits.169", %"struct.llvm::RecyclerStruct"* }
+%"struct.llvm::ilist_traits.169" = type { %"struct.llvm::RecyclerStruct" }
+%"struct.llvm::RecyclerStruct" = type { %"struct.llvm::RecyclerStruct"*, %"struct.llvm::RecyclerStruct"* }
+%"class.llvm::ArrayRecycler" = type { %"class.llvm::SmallVector.174" }
+%"class.llvm::SmallVector.174" = type { %"class.llvm::SmallVectorImpl.175", %"struct.llvm::SmallVectorStorage.179" }
+%"class.llvm::SmallVectorImpl.175" = type { %"class.llvm::SmallVectorTemplateBase.176" }
+%"class.llvm::SmallVectorTemplateBase.176" = type { %"class.llvm::SmallVectorTemplateCommon.177" }
+%"class.llvm::SmallVectorTemplateCommon.177" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.178" }
+%"struct.llvm::AlignedCharArrayUnion.178" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::SmallVectorStorage.179" = type { [7 x %"struct.llvm::AlignedCharArrayUnion.178"] }
+%"class.llvm::Recycler.180" = type { %"class.llvm::iplist.168" }
+%"struct.llvm::ilist.181" = type { %"class.llvm::iplist.182" }
+%"class.llvm::iplist.182" = type { %"struct.llvm::ilist_traits.183", %"class.llvm::MachineBasicBlock"* }
+%"struct.llvm::ilist_traits.183" = type { %"class.llvm::ilist_half_node.1" }
+%"class.llvm::ArrayRecycler<llvm::MachineOperand, 8>::Capacity" = type { i8 }
+%"class.llvm::ConstantInt" = type { %"class.llvm::Constant", %"class.llvm::APInt" }
+%"class.llvm::APInt" = type { i32, %union.anon.189 }
+%union.anon.189 = type { i64 }
+%"class.llvm::ConstantFP" = type { %"class.llvm::Constant", %"class.llvm::APFloat" }
+%"class.llvm::APFloat" = type { %"struct.llvm::fltSemantics"*, %"union.llvm::APFloat::Significand", i16, i8 }
+%"struct.llvm::fltSemantics" = type opaque
+%"union.llvm::APFloat::Significand" = type { i64 }
+%"class.llvm::BlockAddress" = type { %"class.llvm::Constant" }
+%"class.llvm::hash_code" = type { i64 }
+%"struct.llvm::hashing::detail::hash_combine_recursive_helper" = type { [64 x i8], %"struct.llvm::hashing::detail::hash_state", i64 }
+%"struct.llvm::hashing::detail::hash_state" = type { i64, i64, i64, i64, i64, i64, i64, i64 }
+%"class.llvm::PrintReg" = type { %"class.llvm::TargetRegisterInfo"*, i32, i32 }
+%"class.llvm::PseudoSourceValue" = type { %"class.llvm::Value" }
+%"class.llvm::FoldingSetNodeID" = type { %"class.llvm::SmallVector.194" }
+%"class.llvm::SmallVector.194" = type { [28 x i8], %"struct.llvm::SmallVectorStorage.200" }
+%"struct.llvm::SmallVectorStorage.200" = type { [31 x %"struct.llvm::AlignedCharArrayUnion.198"] }
+%"struct.llvm::ArrayRecycler<llvm::MachineOperand, 8>::FreeList" = type { %"struct.llvm::ArrayRecycler<llvm::MachineOperand, 8>::FreeList"* }
+%"class.llvm::ilist_iterator.202" = type { %"class.llvm::MachineInstr"* }
+%"class.llvm::TargetInstrInfo" = type { i32 (...)**, [28 x i8], i32, i32 }
+%"struct.std::pair.203" = type { i8, i8 }
+%"class.llvm::SmallVectorImpl.195" = type { %"class.llvm::SmallVectorTemplateBase.196" }
+%"class.llvm::SmallVectorTemplateBase.196" = type { %"class.llvm::SmallVectorTemplateCommon.197" }
+%"class.llvm::SmallVectorTemplateCommon.197" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.198" }
+%"class.llvm::AliasAnalysis" = type { i32 (...)**, %"class.llvm::DataLayout"*, %"class.llvm::TargetLibraryInfo"*, %"class.llvm::AliasAnalysis"* }
+%"class.llvm::TargetLibraryInfo" = type opaque
+%"struct.llvm::AliasAnalysis::Location" = type { %"class.llvm::Value"*, i64, %"class.llvm::MDNode"* }
+%"class.llvm::DIVariable" = type { %"class.llvm::DIDescriptor" }
+%"class.llvm::DIDescriptor" = type { %"class.llvm::MDNode"* }
+%"class.llvm::DIScope" = type { %"class.llvm::DIDescriptor" }
+%"class.llvm::ArrayRef.208" = type { i32*, i64 }
+%"class.llvm::SmallVector.209" = type { %"class.llvm::SmallVectorImpl.210", %"struct.llvm::SmallVectorStorage.214" }
+%"class.llvm::SmallVectorImpl.210" = type { %"class.llvm::SmallVectorTemplateBase.211" }
+%"class.llvm::SmallVectorTemplateBase.211" = type { %"class.llvm::SmallVectorTemplateCommon.212" }
+%"class.llvm::SmallVectorTemplateCommon.212" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.213" }
+%"struct.llvm::AlignedCharArrayUnion.213" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::SmallVectorStorage.214" = type { [7 x %"struct.llvm::AlignedCharArrayUnion.213"] }
+%"class.llvm::Twine" = type { %"union.llvm::Twine::Child", %"union.llvm::Twine::Child", i8, i8 }
+%"union.llvm::Twine::Child" = type { %"class.llvm::Twine"* }
+%"struct.std::random_access_iterator_tag" = type { i8 }
+
+declare void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineOperand"*)
+
+declare void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineOperand"*)
+
+declare zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"*, i32 zeroext, i32 zeroext)
+
+define void @_ZN4llvm14MachineOperand12substPhysRegEjRKNS_18TargetRegisterInfoE(%"class.llvm::MachineOperand"* %this, i32 zeroext %Reg, %"class.llvm::TargetRegisterInfo"* %TRI) align 2 {
+entry:
+  %SubReg_TargetFlags.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 1
+  %0 = bitcast [3 x i8]* %SubReg_TargetFlags.i to i24*
+  %bf.load.i = load i24* %0, align 1
+  %bf.lshr.i = lshr i24 %bf.load.i, 12
+  %tobool = icmp eq i24 %bf.lshr.i, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %bf.cast.i = zext i24 %bf.lshr.i to i32
+  %add.ptr = getelementptr inbounds %"class.llvm::TargetRegisterInfo"* %TRI, i64 0, i32 1
+  %call3 = tail call zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"* %add.ptr, i32 zeroext %Reg, i32 zeroext %bf.cast.i)
+  %bf.load.i10 = load i24* %0, align 1
+  %bf.clear.i = and i24 %bf.load.i10, 4095
+  store i24 %bf.clear.i, i24* %0, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %Reg.addr.0 = phi i32 [ %call3, %if.then ], [ %Reg, %entry ]
+  %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
+  %1 = load i32* %RegNo.i.i, align 4, !tbaa !0
+  %cmp.i = icmp eq i32 %1, %Reg.addr.0
+  br i1 %cmp.i, label %_ZN4llvm14MachineOperand6setRegEj.exit, label %if.end.i
+
+if.end.i:                                         ; preds = %if.end
+  %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 3
+  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8, !tbaa !3
+  %tobool.i = icmp eq %"class.llvm::MachineInstr"* %2, null
+  br i1 %tobool.i, label %if.end13.i, label %if.then3.i
+
+if.then3.i:                                       ; preds = %if.end.i
+  %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr"* %2, i64 0, i32 2
+  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8, !tbaa !3
+  %tobool5.i = icmp eq %"class.llvm::MachineBasicBlock"* %3, null
+  br i1 %tobool5.i, label %if.end13.i, label %if.then6.i
+
+if.then6.i:                                       ; preds = %if.then3.i
+  %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
+  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8, !tbaa !3
+  %tobool8.i = icmp eq %"class.llvm::MachineFunction"* %4, null
+  br i1 %tobool8.i, label %if.end13.i, label %if.then9.i
+
+if.then9.i:                                       ; preds = %if.then6.i
+  %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction"* %4, i64 0, i32 5
+  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8, !tbaa !3
+  tail call void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  tail call void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
+  br label %_ZN4llvm14MachineOperand6setRegEj.exit
+
+if.end13.i:                                       ; preds = %if.then6.i, %if.then3.i, %if.end.i
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  br label %_ZN4llvm14MachineOperand6setRegEj.exit
+
+_ZN4llvm14MachineOperand6setRegEj.exit:           ; preds = %if.end, %if.then9.i, %if.end13.i
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !"vtable pointer", metadata !2}
+!5 = metadata !{metadata !"long", metadata !1}
+!6 = metadata !{i64 0, i64 8, metadata !3, i64 8, i64 8, metadata !5}
+!7 = metadata !{metadata !"short", metadata !1}
+!8 = metadata !{i64 0, i64 1, metadata !1, i64 1, i64 4, metadata !0, i64 2, i64 1, metadata !1, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 4, i64 4, metadata !0, i64 4, i64 4, metadata !0, i64 8, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !5, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 8, metadata !3, i64 16, i64 4, metadata !0, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 4, metadata !0}
+!9 = metadata !{metadata !"bool", metadata !1}
+!10 = metadata !{i8 0, i8 2}
+
+; CHECK-NOT: lbzu 3, 1(3)
diff --git a/test/CodeGen/PowerPC/pr15359.ll b/test/CodeGen/PowerPC/pr15359.ll
new file mode 100644
index 0000000..12fa3e5
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15359.ll
@@ -0,0 +1,20 @@
+; RUN: llc -O0 -mcpu=pwr7 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+target datalayout = "E-p:64:64:64-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@nextIdx = external thread_local global i32
+
+define fastcc void @func() nounwind {
+entry:
+  store i32 42, i32* @nextIdx
+  ret void
+}
+
+; Verify that nextIdx has symbol type TLS.
+;
+; CHECK:    '.symtab'
+; CHECK:    'nextIdx'
+; CHECK:    'st_type', 0x6
+
diff --git a/test/CodeGen/PowerPC/pwr3-6x.ll b/test/CodeGen/PowerPC/pwr3-6x.ll
new file mode 100644
index 0000000..a9cfe41
--- /dev/null
+++ b/test/CodeGen/PowerPC/pwr3-6x.ll
@@ -0,0 +1,14 @@
+; Test basic support for some older processors.
+
+;RUN: llc < %s -march=ppc64 -mcpu=pwr3 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr4 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr5 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr5x | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr6x | FileCheck %s
+
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: @foo
diff --git a/test/CodeGen/PowerPC/quadint-return.ll b/test/CodeGen/PowerPC/quadint-return.ll
new file mode 100644
index 0000000..0349991
--- /dev/null
+++ b/test/CodeGen/PowerPC/quadint-return.ll
@@ -0,0 +1,19 @@
+; REQUIRES: asserts
+; RUN: llc -O0 -debug -o - < %s 2>&1 | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i128 @foo() nounwind {
+entry:
+  %x = alloca i128, align 16
+  store i128 27, i128* %x, align 16
+  %0 = load i128* %x, align 16
+  ret i128 %0
+}
+
+; CHECK: ********** Function: foo
+; CHECK: ********** FAST REGISTER ALLOCATION **********
+; CHECK: %X3<def> = COPY %vreg
+; CHECK-NEXT: %X4<def> = COPY %vreg
+; CHECK-NEXT: BLR
diff --git a/test/CodeGen/PowerPC/stdux-constuse.ll b/test/CodeGen/PowerPC/stdux-constuse.ll
new file mode 100644
index 0000000..e62d438
--- /dev/null
+++ b/test/CodeGen/PowerPC/stdux-constuse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mcpu=a2 -disable-lsr < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test1(i64 %add, i64* %ptr) nounwind {
+entry:
+  %p1 = getelementptr i64* %ptr, i64 144115188075855
+  br label %for.cond2.preheader
+
+for.cond2.preheader:
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ]
+  %i0 = phi i64* [ %p1, %for.cond2.preheader ], [ %i6, %for.body4 ]
+  %i6 = getelementptr i64* %i0, i64 400000
+  %i7 = getelementptr i64* %i6, i64 300000
+  %i8 = getelementptr i64* %i6, i64 200000
+  %i9 = getelementptr i64* %i6, i64 100000
+  store i64 %add, i64* %i6, align 32
+  store i64 %add, i64* %i7, align 32
+  store i64 %add, i64* %i8, align 32
+  store i64 %add, i64* %i9, align 32
+  %lsr.iv.next = add i32 %lsr.iv, -16
+  %exitcond.15 = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+; Make sure that we generate the most compact form of this loop with no
+; unnecessary moves
+; CHECK: @test1
+; CHECK: mtctr
+; CHECK: stdux
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: bdnz
+
+for.end:
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/PowerPC/svr4-redzone.ll b/test/CodeGen/PowerPC/svr4-redzone.ll
new file mode 100644
index 0000000..91ff579
--- /dev/null
+++ b/test/CodeGen/PowerPC/svr4-redzone.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple="powerpc-unknown-linux-gnu" < %s | FileCheck %s --check-prefix=PPC32
+; RUN: llc -mtriple="powerpc64-unknown-linux-gnu" < %s | FileCheck %s --check-prefix=PPC64
+; PR15332
+
+define void @regalloc() nounwind {
+entry:
+	%0 = add i32 1, 2
+	ret void
+}
+; PPC32: regalloc:
+; PPC32-NOT: stwu 1, -{{[0-9]+}}(1)
+; PPC32: blr
+
+; PPC64: regalloc:
+; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
+; PPC64: blr
+
+define void @smallstack() nounwind {
+entry:
+	%0 = alloca i8, i32 4
+	ret void
+}
+; PPC32: smallstack:
+; PPC32: stwu 1, -16(1)
+
+; PPC64: smallstack:
+; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
+; PPC64: blr
+
+define void @bigstack() nounwind {
+entry:
+	%0 = alloca i8, i32 230
+	ret void
+}
+; PPC32: bigstack:
+; PPC32: stwu 1, -240(1)
+
+; PPC64: bigstack:
+; PPC64: stdu 1, -352(1)
diff --git a/test/CodeGen/PowerPC/tls-2.ll b/test/CodeGen/PowerPC/tls-2.ll
new file mode 100644
index 0000000..20d8fe4
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-2.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s
+
+@a = thread_local global i32 0, align 4
+
+;CHECK:          localexec:
+define i32 @localexec() nounwind {
+entry:
+;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
+;CHECK-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
+  store i32 42, i32* @a, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll
new file mode 100644
index 0000000..4954afe
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld-2.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck %s
+
+; Test peephole optimization for thread-local storage using the
+; local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; CHECK-NEXT: bl __tls_get_addr(a@tlsld)
+; CHECK-NEXT: nop
+; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 713893b..151b4b7 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -1,16 +1,21 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-freebsd10.0"
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc -O0 < %s -march=ppc64 | FileCheck -check-prefix=OPT0 %s
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck -check-prefix=OPT1 %s
 
 @a = thread_local global i32 0, align 4
 
-;CHECK:          localexec:
+;OPT0:          localexec:
+;OPT1:          localexec:
 define i32 @localexec() nounwind {
 entry:
-;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
-;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
-;CHECK-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
-;CHECK-NEXT:     stw [[REG2]], 0([[REG1]])
+;OPT0:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;OPT0-NEXT:     li [[REG2:[0-9]+]], 42
+;OPT0-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
+;OPT0-NEXT:     stw [[REG2]], 0([[REG1]])
+;OPT1:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;OPT1-NEXT:     li [[REG2:[0-9]+]], 42
+;OPT1-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
   store i32 42, i32* @a, align 4
   ret i32 0
 }
diff --git a/test/CodeGen/PowerPC/vaddsplat.ll b/test/CodeGen/PowerPC/vaddsplat.ll
new file mode 100644
index 0000000..e65148a
--- /dev/null
+++ b/test/CodeGen/PowerPC/vaddsplat.ll
@@ -0,0 +1,149 @@
+; RUN: llc -O0 -mcpu=pwr7 <%s | FileCheck %s
+
+; Test optimizations of build_vector for 6-bit immediates.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%v4i32 = type <4 x i32>
+%v8i16 = type <8 x i16>
+%v16i8 = type <16 x i8>
+
+define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_even:
+; CHECK: vspltisw [[REG1:[0-9]+]], 9
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_even:
+; CHECK: vspltisw [[REG1:[0-9]+]], -14
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_even:
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_even:
+; CHECK: vspltish [[REG1:[0-9]+]], -16
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_even:
+; CHECK: vspltisb [[REG1:[0-9]+]], 8
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_even:
+; CHECK: vspltisb [[REG1:[0-9]+]], -9
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 27, i32 27, i32 27, i32 27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], 11
+; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -27, i32 -27, i32 -27, i32 -27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], -11
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], -15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], 1
+; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], -1
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG2]]
+
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index 399f19f..e4799e5 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
+; RUN: llc -O0 -mcpu=pwr7 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
@@ -13,32 +16,71 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float>		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp13, <4 x float>* %P3
 	ret void
+
+; CHECK: test1:
+; CHECK-NOT: CPI
 }
 
 define <4 x i32> @test_30() nounwind {
 	ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >
+
+; CHECK: test_30:
+; CHECK: vspltisw
+; CHECK-NEXT: vadduwm
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_29() nounwind {
 	ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >
+
+; CHECK: test_29:
+; CHECK: vspltisw
+; CHECK-NEXT: vspltisw
+; CHECK-NEXT: vsubuwm
+; CHECK-NEXT: blr
 }
 
 define <8 x i16> @test_n30() nounwind {
 	ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >
+
+; CHECK: test_n30:
+; CHECK: vspltish
+; CHECK-NEXT: vadduhm
+; CHECK-NEXT: blr
 }
 
 define <16 x i8> @test_n104() nounwind {
 	ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >
+
+; CHECK: test_n104:
+; CHECK: vspltisb
+; CHECK-NEXT: vslb
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_vsldoi() nounwind {
 	ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >
+
+; CHECK: test_vsldoi:
+; CHECK: vspltisw
+; CHECK-NEXT: vsldoi
+; CHECK-NEXT: blr
 }
 
 define <8 x i16> @test_vsldoi_65023() nounwind {
 	ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 >
+
+; CHECK: test_vsldoi_65023:
+; CHECK: vspltish
+; CHECK-NEXT: vsldoi
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_rol() nounwind {
 	ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
+
+; CHECK: test_rol:
+; CHECK: vspltisw
+; CHECK-NEXT: vrlw
+; CHECK-NEXT: blr
 }
diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll
index 15a3f9f..998645d 100644
--- a/test/CodeGen/PowerPC/vec_extload.ll
+++ b/test/CodeGen/PowerPC/vec_extload.ll
@@ -15,55 +15,9 @@ define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
   ret <16 x i8> %c
 }
 ; CHECK: v16si8_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslb
+; CHECK: vsrab
+; CHECK: blr 
 
 ; The zero extend uses a more clever logic: a vector splat
 ; and a logic and to set higher bits to 0.
@@ -83,31 +37,9 @@ define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
   ret <8 x i16> %c
 }
 ; CHECK: v8si16_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslh
+; CHECK: vsrah
+; CHECK: blr 
 
 ; Same as v8si16_sext_in_reg, but instead of creating the mask
 ; with a splat, loads it from memory.
@@ -129,19 +61,9 @@ define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
   ret <4 x i32> %c
 }
 ; CHECK: v4si32_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslw
+; CHECK: vsraw
+; CHECK: blr 
 
 ; Same as v8si16_sext_in_reg.
 define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
new file mode 100644
index 0000000..114f9e7
--- /dev/null
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -0,0 +1,18 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @v4i32_kernel_arg
+; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+
+define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32>  %in) {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @v4f32_kernel_arg
+; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float>  %in) {
+entry:
+  store <4 x float> %in, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index 1acf905..fd958b3 100644
--- a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -1,13 +1,15 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
 ; This test is for a bug in
 ; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where
 ; the wrong type was being passed to
 ; TargetLowering::getOperationAction() when checking the legality of
 ; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes.
 
+
+; CHECK: @sint
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
 define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %ptr = getelementptr i32 addrspace(1)* %in, i32 1
@@ -19,6 +21,7 @@ entry:
   ret void
 }
 
+;CHECK: @uint
 ;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
new file mode 100644
index 0000000..a586742
--- /dev/null
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; PRED_SET* instructions must be tied to any instruction that uses their
+; result.  This tests that there are no instructions between the PRED_SET*
+; and the PREDICATE_BREAK in this loop.
+
+; CHECK: @loop_ge
+; CHECK: WHILE
+; CHECK: PRED_SET
+; CHECK-NEXT: PREDICATED_BREAK
+define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
+entry:
+  %cmp5 = icmp sgt i32 %iterations, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
+  %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %i.07 = add nsw i32 %i.07.in, -1
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+  store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %ai.06, 1
+  %exitcond = icmp eq i32 %add, %iterations
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index 1dcd07c..89f5e9e 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,8 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: SETE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-;CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
new file mode 100644
index 0000000..a3d4d0f
--- /dev/null
+++ b/test/CodeGen/R600/fmad.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = call float @llvm.R600.load.input(i32 2)
+   %r3 = fmul float %r0, %r1
+	%r4 = fadd float %r3, %r2
+   call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index 0ec1c37..591aa52 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -1,7 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: MOV T{{[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
new file mode 100644
index 0000000..382f78c
--- /dev/null
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -0,0 +1,52 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(9)* null
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = fcmp ult float %1, 0.000000e+00
+  %7 = select i1 %6, float %3, float %5
+  %8 = load <4 x float> addrspace(9)* null
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %11 = extractelement <4 x float> %10, i32 1
+  %12 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %13 = extractelement <4 x float> %12, i32 1
+  %14 = fcmp ult float %9, 0.000000e+00
+  %15 = select i1 %14, float %11, float %13
+  %16 = load <4 x float> addrspace(9)* null
+  %17 = extractelement <4 x float> %16, i32 2
+  %18 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %19 = extractelement <4 x float> %18, i32 2
+  %20 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %21 = extractelement <4 x float> %20, i32 2
+  %22 = fcmp ult float %17, 0.000000e+00
+  %23 = select i1 %22, float %19, float %21
+  %24 = load <4 x float> addrspace(9)* null
+  %25 = extractelement <4 x float> %24, i32 3
+  %26 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %27 = extractelement <4 x float> %26, i32 3
+  %28 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %29 = extractelement <4 x float> %28, i32 3
+  %30 = fcmp ult float %25, 0.000000e+00
+  %31 = select i1 %30, float %27, float %29
+  %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+  %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+  %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+  %36 = insertelement <4 x float> undef, float %32, i32 0
+  %37 = insertelement <4 x float> %36, float %33, i32 1
+  %38 = insertelement <4 x float> %37, float %34, i32 2
+  %39 = insertelement <4 x float> %38, float %35, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+  ret void
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
index 4c731b2..be62342 100644
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -6,6 +6,7 @@
 ; or
 ; ADD_INT literal.x REG, 5
 
+; CHECK; @i32_literal
 ; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5
 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -20,6 +21,7 @@ entry:
 ; or
 ; ADD literal.x REG, 5.0
 
+; CHECK: @float_literal
 ; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0
 define void @float_literal(float addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
new file mode 100644
index 0000000..74331fa
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -0,0 +1,42 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+   %addr = load <4 x float> addrspace(1)* %in
+   %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
+   %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
+   %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
+   %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res3, i32 0, i32 0, i32 4)
+   %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res4, i32 0, i32 0, i32 5)
+   %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res5, i32 0, i32 0, i32 6)
+   %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res6, i32 0, i32 0, i32 7)
+   %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res7, i32 0, i32 0, i32 8)
+   %res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res8, i32 0, i32 0, i32 9)
+   %res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res9, i32 0, i32 0, i32 10)
+   %res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res10, i32 0, i32 0, i32 11)
+   %res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res11, i32 0, i32 0, i32 12)
+   %res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res12, i32 0, i32 0, i32 13)
+   %res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res13, i32 0, i32 0, i32 14)
+   %res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res14, i32 0, i32 0, i32 15)
+   %res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res15, i32 0, i32 0, i32 16)
+   store <4 x float> %res16, <4 x float> addrspace(1)* %out
+   ret void
+}
+
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
new file mode 100644
index 0000000..0c19f14
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -0,0 +1,23 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: S_MOV_B32
+;CHECK-NEXT: V_INTERP_MOV_F32
+
+define void @main() {
+main_body:
+  call void @llvm.AMDGPU.shader.type(i32 0)
+  %0 = load i32 addrspace(8)* inttoptr (i32 6 to i32 addrspace(8)*)
+  %1 = call float @llvm.SI.fs.interp.constant(i32 0, i32 0, i32 %0)
+  %2 = call i32 @llvm.SI.packf16(float %1, float %1)
+  %3 = bitcast i32 %2 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
+  ret void
+}
+
+declare void @llvm.AMDGPU.shader.type(i32)
+
+declare float @llvm.SI.fs.interp.constant(i32, i32, i32) readonly
+
+declare i32 @llvm.SI.packf16(float, float) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
new file mode 100644
index 0000000..34d1935
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -0,0 +1,71 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE_C
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE_C
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE_C
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE_C
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE_C
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE_C
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+;CHECK-NEXT: S_WAITCNT 1792
+;CHECK-NEXT: IMAGE_SAMPLE
+
+define void @test() {
+   %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 1)
+   %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 2)
+   %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 3)
+   %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 4)
+   %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 5)
+   %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 6)
+   %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 7)
+   %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 8)
+   %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 9)
+   %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 10)
+   %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 11)
+   %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 12)
+   %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 13)
+   %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 14)
+   %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 15)
+   %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef,
+      <8 x i32> undef, <4 x i32> undef, i32 16)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32)
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
new file mode 100644
index 0000000..18895a4
--- /dev/null
+++ b/test/CodeGen/R600/predicates.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests make sure the compiler is optimizing branches using predicates
+; when it is legal to do so.
+
+; CHECK: @simple_if
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF, label %ENDIF
+
+IF:
+  %1 = shl i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %2 = phi i32 [ %in, %entry ], [ %1, %IF ]
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @simple_if_else
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF, label %ELSE
+
+IF:
+  %1 = shl i32 %in, 1
+  br label %ENDIF
+
+ELSE:
+  %2 = lshr i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %3 = phi i32 [ %1, %IF ], [ %2, %ELSE ]
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @nested_if
+; CHECK: IF_PREDICATE_SET
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: ENDIF
+define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF0, label %ENDIF
+
+IF0:
+  %1 = add i32 %in, 10
+  %2 = icmp sgt i32 %1, 0
+  br i1 %2, label %IF1, label %ENDIF
+
+IF1:
+  %3 = shl i32  %1, 1
+  br label %ENDIF
+
+ENDIF:
+  %4 = phi i32 [%in, %entry], [%1, %IF0], [%3, %IF1]
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @nested_if_else
+; CHECK: IF_PREDICATE_SET
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: ENDIF
+define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF0, label %ENDIF
+
+IF0:
+  %1 = add i32 %in, 10
+  %2 = icmp sgt i32 %1, 0
+  br i1 %2, label %IF1, label %ELSE1
+
+IF1:
+  %3 = shl i32  %1, 1
+  br label %ENDIF
+
+ELSE1:
+  %4 = lshr i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %5 = phi i32 [%in, %entry], [%3, %IF1], [%4, %ELSE1]
+  store i32 %5, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
index f65a300..359ca1e 100644
--- a/test/CodeGen/R600/selectcc-icmp-select-float.ll
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -2,7 +2,7 @@
 
 ; Note additional optimizations may cause this SGT to be replaced with a
 ; CND* instruction.
-; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
 ; Test a selectcc with i32 LHS/RHS and float True/False
 
 define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
new file mode 100644
index 0000000..54febcf
--- /dev/null
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests check that floating point comparisons which are used by select
+; to store integer true (-1) and false (0) values are lowered to one of the
+; SET*DX10 instructions.
+
+; CHECK: @fcmp_une_select_fptosi
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp une float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_une_select_i32
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp une float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ueq_select_fptosi
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ueq float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ueq_select_i32
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ueq float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ugt_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ugt float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ugt_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ugt float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_uge_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp uge float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_uge_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp uge float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ule_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ule_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ult_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ult_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll
index 1070250..b69e327 100644
--- a/test/CodeGen/R600/short-args.ll
+++ b/test/CodeGen/R600/short-args.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
+; CHECK: @i8_arg
 ; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
 
 define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
@@ -9,6 +10,7 @@ entry:
   ret void
 }
 
+; CHECK: @i8_zext_arg
 ; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
 
 define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
@@ -18,6 +20,7 @@ entry:
   ret void
 }
 
+; CHECK: @i16_arg
 ; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
 
 define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
@@ -27,6 +30,7 @@ entry:
   ret void
 }
 
+; CHECK: @i16_zext_arg
 ; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
 
 define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
new file mode 100644
index 0000000..b48c591
--- /dev/null
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests are for condition codes that are not supported by the hardware
+
+; CHECK: @slt
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+define void @slt(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp slt i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ult_i32
+; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp ult i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ult_float
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ult_float(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @olt
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @olt(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp olt float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @sle
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+define void @sle(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sle i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ule_i32
+; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp ule i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ule_float
+; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ule_float(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ole
+; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ole(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ole float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll
index c61f6e2..8f62bc6 100644
--- a/test/CodeGen/R600/vec4-expand.ll
+++ b/test/CodeGen/R600/vec4-expand.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
+; CHECK: @fp_to_sint
 ; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -12,6 +13,7 @@ define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)*
   ret void
 }
 
+; CHECK: @fp_to_uint
 ; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -24,6 +26,7 @@ define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)*
   ret void
 }
 
+; CHECK: @sint_to_fp
 ; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -36,6 +39,7 @@ define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)*
   ret void
 }
 
+; CHECK: @uint_to_fp
 ; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
diff --git a/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll b/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
new file mode 100644
index 0000000..3f6407a
--- /dev/null
+++ b/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=thumb -mcpu=arm1022e
+
+%iterator = type { i8**, i8**, i8**, i8*** }
+%insert_iterator = type { %deque*, %iterator }
+%deque = type { %iterator, %iterator, i8***, i32 }
+
+define i32 @test_thumbv5e_fp_elim() nounwind optsize {
+entry:
+  %var1 = alloca %iterator, align 4
+  %var2 = alloca %insert_iterator, align 4
+  %var3 = alloca %deque, align 4
+
+  %0 = bitcast %deque* %var3 to i8*
+  %1 = bitcast %iterator* %var1 to i8*
+  call void @llvm.lifetime.start(i64 16, i8* %1) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %0, i32 16, i32 4, i1 false)
+  call void @llvm.lifetime.end(i64 16, i8* %1) nounwind
+
+  %2 = bitcast %insert_iterator* %var2 to i8*
+  call void @llvm.lifetime.start(i64 20, i8* %2) nounwind
+
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
new file mode 100644
index 0000000..502b138
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios6.0.0 -verify-machineinstrs
+
+; Check to make sure the tail-call return at the end doesn't use a
+; callee-saved register. Register hinting from t2LDRDri was getting this
+; wrong. The intervening call will force allocation to try a high register
+; first, so the hint will attempt to fire, but must be rejected due to
+; not being in the allocation order for the tcGPR register class.
+; The machine instruction verifier will make sure that all actually worked
+; out the way it's supposed to.
+
+%"myclass" = type { %struct.foo }
+%struct.foo = type { i32, [40 x i8] }
+
+define hidden void @func(i8* %Data) nounwind ssp {
+  %1 = getelementptr inbounds i8* %Data, i32 12
+  %2 = bitcast i8* %1 to %"myclass"*
+  tail call void @abc(%"myclass"* %2) nounwind
+  tail call void @def(%"myclass"* %2) nounwind
+  %3 = getelementptr inbounds i8* %Data, i32 8
+  %4 = bitcast i8* %3 to i8**
+  %5 = load i8** %4, align 4, !tbaa !0
+  tail call void @ghi(i8* %5) nounwind
+  %6 = bitcast i8* %Data to void (i8*)**
+  %7 = load void (i8*)** %6, align 4, !tbaa !0
+  %8 = getelementptr inbounds i8* %Data, i32 4
+  %9 = bitcast i8* %8 to i8**
+  %10 = load i8** %9, align 4, !tbaa !0
+  %11 = icmp eq i8* %Data, null
+  br i1 %11, label %14, label %12
+
+; <label>:12                                      ; preds = %0
+  %13 = tail call %"myclass"* @jkl(%"myclass"* %2) nounwind
+  tail call void @mno(i8* %Data) nounwind
+  br label %14
+
+; <label>:14                                      ; preds = %12, %0
+  tail call void %7(i8* %10) nounwind
+  ret void
+}
+
+declare void @mno(i8*)
+
+declare void @def(%"myclass"*)
+
+declare void @abc(%"myclass"*)
+
+declare void @ghi(i8*)
+
+declare %"myclass"* @jkl(%"myclass"*) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
index c98ca80..3a2803f 100644
--- a/test/CodeGen/Thumb2/aligned-spill.ll
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -26,8 +26,8 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128]
 ; Stack pointer adjustment for the stack frame contents.
 ; This could legally happen before the spills.
 ; Since the spill slot is only 8 bytes, technically it would be fine to only
@@ -36,8 +36,8 @@ entry:
 ; NEON: sub sp, #16
 ; The epilog is free to use another scratch register than r4.
 ; NEON: add r[[R4:[0-9]+]], sp, #16
-; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
-; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128]
 ; The stack pointer restore must happen after the reloads.
 ; NEON: mov sp,
 ; NEON: pop
@@ -57,8 +57,8 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13}, [r4:128]
 ; NEON: vstr d14, [r4, #16]
 ; Epilog
 ; NEON: vld1.64 {d8, d9, d10, d11},
@@ -84,7 +84,7 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vst1.64 {d8, d9}, [r4:128]
 ; NEON: vstr d10, [r4, #16]
 ; Epilog
 ; NEON: vld1.64 {d8, d9},
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index b7df2fb..f6cea72 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -7,7 +7,7 @@ define float @foo(float %a, float %b) {
 entry:
 ; CHECK: foo
 ; CORTEXM3: blx ___mulsf3
-; CORTEXM4: vmul.f32  s0, s2, s0
+; CORTEXM4: vmul.f32  s
 ; CORTEXA8: vmul.f32  d
   %0 = fmul float %a, %b
   ret float %0
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index cb4d080..6ce0b82 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs -O0
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -76,3 +77,11 @@ entry:
   store i32 %num, i32* %p2, align 4
   ret void
 }
+
+; Check RAFast handling of inline assembly with many dense clobbers.
+; The large tuple aliases of the vector registers can cause problems.
+define void @rdar13249625(double* nocapture %p) nounwind {
+  %1 = tail call double asm sideeffect "@ $0", "=w,~{d0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+  store double %1, double* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 2178eec..bce8474 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep "ldr.*\[.*\]," | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
@@ -9,4 +8,5 @@ define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
 }
+; CHECK: 	ldr	r{{.*}},	[{{.*}}],
 
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index d9a0617..5bff268 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
 ; CHECK: bic r4, r4, #15
-; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
-; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
+; CHECK: vst1.64 {{.*}}[{{.*}}:128]
+; CHECK: vld1.64 {{.*}}[{{.*}}:128]
 entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index d423bfc..496779c 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -1,10 +1,15 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %rbp | count 7
-; RUN: llc < %s | grep %rcx | count 3
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
+; CHECK: test
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: popq %rbp
+; CHECK: movq %rcx, %rsp
+; CHECK: ret # eh_return, addr: %rcx
 define i8* @test(i64 %a, i8* %b)  {
 entry:
   call void @llvm.eh.unwind.init()
@@ -15,3 +20,36 @@ entry:
 
 declare void @llvm.eh.return.i64(i64, i8*)
 declare void @llvm.eh.unwind.init()
+
+@b = common global i32 0, align 4
+@a = common global i32 0, align 4
+
+; PR14750
+; This function contains a normal return as well as eh_return.
+; CHECK: _Unwind_Resume_or_Rethrow
+define i32 @_Unwind_Resume_or_Rethrow() nounwind uwtable ssp {
+entry:
+  %0 = load i32* @b, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  ret i32 0
+
+if.end:                                           ; preds = %entry
+  %call = tail call i32 (...)* @_Unwind_ForcedUnwind_Phase2() nounwind
+  store i32 %call, i32* @a, align 4
+  %tobool1 = icmp eq i32 %call, 0
+  br i1 %tobool1, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %if.end
+  tail call void @abort() noreturn nounwind
+  unreachable
+
+cond.end:                                         ; preds = %if.end
+  tail call void @llvm.eh.return.i64(i64 0, i8* null)
+  unreachable
+}
+
+declare i32 @_Unwind_ForcedUnwind_Phase2(...)
+declare void @abort() noreturn
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
index 3144678..cf40624 100644
--- a/test/CodeGen/X86/2010-12-02-MC-Set.ll
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -18,5 +18,5 @@ entry:
 
 ; CHECK: .subsections_via_symbols
 ; CHECK-NEXT: __debug_line
-; CHECK-NEXT: Ltmp
+; CHECK-NEXT: Lline_table_start0
 ; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
index f66248b..8ac4632 100644
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -8,9 +8,9 @@ target triple = "x86_64-apple-macosx10.6.6"
 ; CHECK: pblendvb        %xmm1, %xmm2
 ; CHECK: ret
 
-define void @select_func() {
+define void @select_func(<8 x i16> %in) {
 entry:
-  %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
   %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
   %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
   %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll
index 1a9d46d..dbc122a 100644
--- a/test/CodeGen/X86/2011-12-28-vselecti8.ll
+++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin11.2.0"
 
 ; CHECK: @foo8
 ; CHECK: psll
-; CHECK-NOT: psraw
+; CHECK: psraw
 ; CHECK: pblendvb
 ; CHECK: ret
 define void @foo8(float* nocapture %RET) nounwind {
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
index 6b90072..7e91498 100644
--- a/test/CodeGen/X86/2012-01-11-split-cv.ll
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -2,7 +2,7 @@
 
 ;CHECK: add18i16
 define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
-;CHECK: vmovups
+;CHECK: vmovaps
   %b = load <18 x i16>* %bp, align 16
   %x = add <18 x i16> zeroinitializer, %b
   store <18 x i16> %x, <18 x i16>* %ret, align 16
diff --git a/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
new file mode 100644
index 0000000..db7ec8a
--- /dev/null
+++ b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.5.0 < %s
+
+; rdar://12968664
+
+define void @t() nounwind uwtable ssp {
+  br label %4
+
+; <label>:1                                       ; preds = %4, %2
+  ret void
+
+; <label>:2                                       ; preds = %6, %5, %3, %2
+  switch i32 undef, label %2 [
+    i32 1090573978, label %1
+    i32 1090573938, label %3
+    i32 1090573957, label %5
+  ]
+
+; <label>:3                                       ; preds = %4, %2
+  br i1 undef, label %2, label %4
+
+; <label>:4                                       ; preds = %6, %5, %3, %0
+  switch i32 undef, label %11 [
+    i32 1090573938, label %3
+    i32 1090573957, label %5
+    i32 1090573978, label %1
+    i32 165205179, label %6
+  ]
+
+; <label>:5                                       ; preds = %4, %2
+  br i1 undef, label %2, label %4
+
+; <label>:6                                       ; preds = %4
+  %7 = icmp eq i32 undef, 590901838
+  %8 = or i1 false, %7
+  %9 = or i1 true, %8
+  %10 = xor i1 %8, %9
+  br i1 %10, label %4, label %2
+
+; <label>:11                                      ; preds = %11, %4
+  br label %11
+}
diff --git a/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll b/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
new file mode 100644
index 0000000..614ccda
--- /dev/null
+++ b/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; CHECK: test
+; CHECK: vpmovzxwd
+; CHECK: vpmovzxwd
+define void @test(<4 x i64> %a, <4 x i16>* %buf) {
+  %ex1 = extractelement <4 x i64> %a, i32 0
+  %ex2 = extractelement <4 x i64> %a, i32 1
+  %x1 = bitcast i64 %ex1 to <4 x i16>
+  %x2 = bitcast i64 %ex2 to <4 x i16>
+  %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  store <4 x i16> %Sh, <4 x i16>* %buf, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index 8e93762..8b0a349 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: llc < %s -march=x86 > %t
+; RUN: llc < %s -march=x86-64 > %t.x86-64
+; RUN: llc < %s -march=x86 > %t.x86
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 
diff --git a/test/CodeGen/X86/GC/ocaml-gc.ll b/test/CodeGen/X86/GC/ocaml-gc.ll
new file mode 100644
index 0000000..44241a9
--- /dev/null
+++ b/test/CodeGen/X86/GC/ocaml-gc.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+define i32 @main(i32 %x) nounwind gc "ocaml" {
+; CHECK:        .text
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___code_begin
+; CHECK-NEXT: caml_3C_stdin_3E___code_begin:
+; CHECK-NEXT:   .data
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_begin
+; CHECK-NEXT: caml_3C_stdin_3E___data_begin:
+
+  %puts = tail call i32 @foo(i32 %x)
+  ret i32 0
+
+; CHECK:        .globl  caml_3C_stdin_3E___code_end
+; CHECK-NEXT: caml_3C_stdin_3E___code_end:
+; CHECK-NEXT:   .data
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_end
+; CHECK-NEXT: caml_3C_stdin_3E___data_end:
+; CHECK-NEXT:   .quad   0
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___frametable
+; CHECK-NEXT: caml_3C_stdin_3E___frametable:
+; CHECK-NEXT:   .short  1
+; CHECK-NEXT:   .align  8
+; CHECK-NEXT:                # live roots for main
+; CHECK-NEXT:   .quad   .Ltmp0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .align  8
+}
+
+declare i32 @foo(i32)
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index 64825ba..fbe8879 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -40,9 +40,43 @@ define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwt
   ret void
 }
 
+; No vectors because we use noimplicitfloat
+; CHECK: merge_const_store_no_vec
+; CHECK-NOT: vmovups
+; CHECK: ret
+define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
+  %1 = icmp sgt i32 %count, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+.lr.ph:
+  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+  %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
+  %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
+  store i32 0, i32* %2, align 4
+  %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
+  store i32 0, i32* %3, align 4
+  %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
+  store i32 0, i32* %4, align 4
+  %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
+  store i32 0, i32* %5, align 4
+  %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
+  store i32 0, i32* %6, align 4
+  %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
+  store i32 0, i32* %7, align 4
+  %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
+  store i32 0, i32* %8, align 4
+  %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
+  store i32 0, i32* %9, align 4
+  %10 = add nsw i32 %i.02, 1
+  %11 = getelementptr inbounds %struct.B* %.01, i64 1
+  %exitcond = icmp eq i32 %10, %count
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+._crit_edge:
+  ret void
+}
+
 ; Move the constants using a single vector store.
 ; CHECK: merge_const_store_vec
-; CHECK: vmovups  %ymm0, (%rsi)
+; CHECK: vmovups
 ; CHECK: ret
 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
   %1 = icmp sgt i32 %count, 0
diff --git a/test/CodeGen/X86/atom-pad-short-functions.ll b/test/CodeGen/X86/atom-pad-short-functions.ll
index 54af17d..b9a39e0 100644
--- a/test/CodeGen/X86/atom-pad-short-functions.ll
+++ b/test/CodeGen/X86/atom-pad-short-functions.ll
@@ -22,6 +22,13 @@ define i32 @test_optsize(i32 %a) nounwind optsize {
   ret i32 %a
 }
 
+define i32 @test_minsize(i32 %a) nounwind minsize {
+; CHECK: test_minsize
+; CHECK: movl
+; CHECK-NEXT: ret
+  ret i32 %a
+}
+
 define i32 @test_add(i32 %a, i32 %b) nounwind {
 ; CHECK: test_add
 ; CHECK: addl
@@ -76,3 +83,21 @@ if.end:
   ret void
 
 }
+
+define void @test_branch_to_same_bb(i32 %x, i32 %y) nounwind {
+; CHECK: @test_branch_to_same_bb
+  %cmp = icmp sgt i32 %x, 0
+  br i1 %cmp, label %while.cond, label %while.end
+
+while.cond:
+  br label %while.cond
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+while.end:
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll
new file mode 100644
index 0000000..00891d6
--- /dev/null
+++ b/test/CodeGen/X86/atomic-dagsched.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind {
+entry:
+  %ptrtoarg4 = load i8** %a, align 8
+  %brglist1 = getelementptr i8** %a, i64 1
+  %ptrtoarg25 = load i8** %brglist1, align 8
+  %0 = load i64* %b, align 8
+  %1 = mul i64 %0, 4
+  %scevgep = getelementptr i8* %ptrtoarg25, i64 %1
+  %2 = mul i64 %d, 4
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %test.exit, %entry
+  %asr.iv6 = phi i8* [ %29, %test.exit ], [ %scevgep, %entry ]
+  %iv = phi i64 [ %0, %entry ], [ %28, %test.exit ]
+  %3 = icmp eq i64 %iv, %c
+  br i1 %3, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
+  %5 = load i64* %4, align 8, !tbaa !3
+  %vector.size.i = ashr i64 %5, 3
+  %num.vector.wi.i = shl i64 %vector.size.i, 3
+  %6 = icmp eq i64 %vector.size.i, 0
+  br i1 %6, label %scalarIf.i, label %dim_0_vector_pre_head.i
+
+dim_0_vector_pre_head.i:                          ; preds = %loop
+  %7 = trunc i64 %5 to i32
+  %tempvector_func.i = insertelement <8 x i32> undef, i32 %7, i32 0
+  %vectorvector_func.i = shufflevector <8 x i32> %tempvector_func.i, <8 x i32> undef, <8 x i32> zeroinitializer
+  br label %vector_kernel_entry.i
+
+vector_kernel_entry.i:                            ; preds = %vector_kernel_entry.i, %dim_0_vector_pre_head.i
+  %asr.iv9 = phi i8* [ %scevgep10, %vector_kernel_entry.i ], [ %asr.iv6, %dim_0_vector_pre_head.i ]
+  %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ]
+  %8 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %asr.iv911 = bitcast i8* %asr.iv9 to <8 x i32> addrspace(1)*
+  %9 = load <8 x i32> addrspace(1)* %asr.iv911, align 4
+  %extract8vector_func.i = extractelement <8 x i32> %9, i32 0
+  %extract9vector_func.i = extractelement <8 x i32> %9, i32 1
+  %extract10vector_func.i = extractelement <8 x i32> %9, i32 2
+  %extract11vector_func.i = extractelement <8 x i32> %9, i32 3
+  %extract12vector_func.i = extractelement <8 x i32> %9, i32 4
+  %extract13vector_func.i = extractelement <8 x i32> %9, i32 5
+  %extract14vector_func.i = extractelement <8 x i32> %9, i32 6
+  %extract15vector_func.i = extractelement <8 x i32> %9, i32 7
+  %10 = atomicrmw min i32 addrspace(1)* %8, i32 %extract8vector_func.i seq_cst
+  %11 = atomicrmw min i32 addrspace(1)* %8, i32 %extract9vector_func.i seq_cst
+  %12 = atomicrmw min i32 addrspace(1)* %8, i32 %extract10vector_func.i seq_cst
+  %13 = atomicrmw min i32 addrspace(1)* %8, i32 %extract11vector_func.i seq_cst
+  %14 = atomicrmw min i32 addrspace(1)* %8, i32 %extract12vector_func.i seq_cst
+  %15 = atomicrmw min i32 addrspace(1)* %8, i32 %extract13vector_func.i seq_cst
+  %16 = atomicrmw min i32 addrspace(1)* %8, i32 %extract14vector_func.i seq_cst
+  %17 = atomicrmw min i32 addrspace(1)* %8, i32 %extract15vector_func.i seq_cst
+  store <8 x i32> %vectorvector_func.i, <8 x i32> addrspace(1)* %asr.iv911, align 4
+  %asr.iv.next = add i64 %asr.iv, -1
+  %scevgep10 = getelementptr i8* %asr.iv9, i64 32
+  %dim_0_vector_cmp.to.max.i = icmp eq i64 %asr.iv.next, 0
+  br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %vector_kernel_entry.i
+
+scalarIf.i:                                       ; preds = %vector_kernel_entry.i, %loop
+  %exec_wi.i = phi i64 [ 0, %loop ], [ %num.vector.wi.i, %vector_kernel_entry.i ]
+  %18 = icmp eq i64 %exec_wi.i, %5
+  br i1 %18, label %test.exit, label %dim_0_pre_head.i
+
+dim_0_pre_head.i:                                 ; preds = %scalarIf.i
+  %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
+  %20 = load i64* %19, align 8, !tbaa !3
+  %21 = trunc i64 %20 to i32
+  %22 = mul i64 %vector.size.i, 8
+  br label %scalar_kernel_entry.i
+
+scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry.i, %dim_0_pre_head.i
+  %asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ]
+  %23 = bitcast i8* %asr.iv6 to i32 addrspace(1)*
+  %24 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
+  %25 = load i32 addrspace(1)* %scevgep16, align 4, !tbaa !4
+  %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
+  %scevgep15 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
+  store i32 %21, i32 addrspace(1)* %scevgep15, align 4, !tbaa !4
+  %asr.iv.next13 = add i64 %asr.iv12, 1
+  %dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13
+  br i1 %dim_0_cmp.to.max.i, label %test.exit, label %scalar_kernel_entry.i
+
+test.exit:                     ; preds = %scalar_kernel_entry.i, %scalarIf.i
+  %27 = bitcast i8* %asr.iv6 to i1*
+  %28 = add i64 %iv, %d
+  store i64 %28, i64* %b, align 8
+  %scevgep8 = getelementptr i1* %27, i64 %2
+  %29 = bitcast i1* %scevgep8 to i8*
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  store i64 %0, i64* %b, align 8
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"long", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
+
+; CHECK: test
+; CHECK: decq
+; CHECK-NOT: cmpxchgl
+; CHECK: jne
+; CHECK: ret
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 0fec965..0550720 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -127,3 +127,43 @@ define i32 @test_int(i32 %a, i32 %b) nounwind {
     %c = add i32 %c2, %b
 	ret i32 %c
 }
+
+; WIN64: test_float4
+; WIN64-NOT: vzeroupper
+; WIN64: call
+; WIN64-NOT: vzeroupper
+; WIN64: call
+; WIN64: ret
+
+; X64: test_float4
+; X64-NOT: vzeroupper
+; X64: call
+; X64-NOT: vzeroupper
+; X64: call
+; X64: ret
+
+; X32: test_float4
+; X32: vzeroupper
+; X32: call
+; X32: vzeroupper
+; X32: call
+; X32: ret
+
+declare <4 x float> @func_float4(<4 x float>, <4 x float>, <4 x float>)
+
+define <8 x float> @test_float4(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone {
+entry:
+  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %call.i = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %0, <4 x float> %1, <4 x float> %2) nounwind
+  %3 = shufflevector <4 x float> %call.i, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %4 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %5 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %6 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %call.i2 = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %4, <4 x float> %5, <4 x float> %6) nounwind
+  %7 = shufflevector <4 x float> %call.i2, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x float> %8
+}
+
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index c9fc66a..77a7c4f 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -53,19 +53,24 @@ define void @storev16i16(<16 x i16> %a) nounwind {
   unreachable
 }
 
-; CHECK: vmovups  %ymm
+; CHECK: storev16i16_01
+; CHECK: vextractf128
+; CHECK: vmovaps  %xmm
 define void @storev16i16_01(<16 x i16> %a) nounwind {
   store <16 x i16> %a, <16 x i16>* undef, align 4
   unreachable
 }
 
+; CHECK: storev32i8
 ; CHECK: vmovaps  %ymm
 define void @storev32i8(<32 x i8> %a) nounwind {
   store <32 x i8> %a, <32 x i8>* undef, align 32
   unreachable
 }
 
-; CHECK: vmovups  %ymm
+; CHECK: storev32i8_01
+; CHECK: vextractf128
+; CHECK: vmovups  %xmm
 define void @storev32i8_01(<32 x i8> %a) nounwind {
   store <32 x i8> %a, <32 x i8>* undef, align 4
   unreachable
@@ -76,7 +81,7 @@ define void @storev32i8_01(<32 x i8> %a) nounwind {
 ; CHECK: _double_save
 ; CHECK-NOT: vinsertf128 $1
 ; CHECK-NOT: vinsertf128 $0
-; CHECK: vmovaps %xmm
+; CHECK: vmovups %xmm
 ; CHECK: vmovaps %xmm
 define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
index 8d7d79d..7ae0d36 100755
--- a/test/CodeGen/X86/avx-sext.ll
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -142,3 +142,26 @@ define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
  %Y = sext <8 x i8> %X to <8 x i16>
  ret <8 x i16>%Y
 }
+
+; AVX: sext_4i1_to_4i64
+; AVX: vpslld  $31
+; AVX: vpsrad  $31
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
+  %extmask = sext <4 x i1> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
+; AVX: sext_4i8_to_4i64
+; AVX: vpslld  $24
+; AVX: vpsrad  $24
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
+  %extmask = sext <4 x i8> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 681747b..b0bff45 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -112,6 +112,16 @@ define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   ret <8 x i32> %bitop
 }
 
+; PR15141
+; CHECK: _vshift13:
+; CHECK-NOT: vpsll
+; CHECK: vcvttps2dq
+; CHECK-NEXT: vpmulld
+define <4 x i32> @vshift13(<4 x i32> %in) {
+  %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %T
+}
+
 ;;; Uses shifts for sign extension
 ; CHECK: _sext_v16i16
 ; CHECK: vpsllw
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 65685a3..73faa1f 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -98,8 +98,8 @@ define i32 @test10(<4 x i32> %a) nounwind {
 }
 
 define <4 x float> @test11(<4 x float> %a) nounwind  {
-; check: test11
-; check: vpermilps $27
+; CHECK: test11
+; CHECK: vpshufd $27
   %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %tmp1
 }
@@ -113,8 +113,8 @@ define <4 x float> @test12(<4 x float>* %a) nounwind  {
 }
 
 define <4 x i32> @test13(<4 x i32> %a) nounwind  {
-; check: test13
-; check: vpshufd $27
+; CHECK: test13
+; CHECK: vpshufd $27
   %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %tmp1
 }
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 67e4b40..5c01c2c 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -3,8 +3,8 @@
 
 ; CHECK: vpunpcklbw %xmm
 ; CHECK-NEXT: vpunpckhbw %xmm
+; CHECK-NEXT: vpshufd $85
 ; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -12,8 +12,8 @@ entry:
 }
 
 ; CHECK: vpunpckhwd %xmm
+; CHECK-NEXT: vpshufd $85
 ; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 3444542..e565da7 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -5,7 +5,8 @@
 ; shifting the needed bit to the MSB, and not using shl+sra.
 
 ;CHECK: vsel_float
-;CHECK: pslld
+;CHECK: movl $-2147483648
+;CHECK-NEXT: movd
 ;CHECK-NEXT: blendvps
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
@@ -14,7 +15,8 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 }
 
 ;CHECK: vsel_4xi8
-;CHECK: pslld
+;CHECK: movl $-2147483648
+;CHECK-NEXT: movd
 ;CHECK-NEXT: blendvps
 ;CHECK: ret
 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
@@ -28,7 +30,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
 ; reduce the mask in this case.
 ;CHECK: vsel_8xi16
 ;CHECK: psllw
-;CHECK-NOT: psraw
+;CHECK: psraw
 ;CHECK: pblendvb
 ;CHECK: ret
 define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
diff --git a/test/CodeGen/X86/cas.ll b/test/CodeGen/X86/cas.ll
new file mode 100644
index 0000000..c2dd05e
--- /dev/null
+++ b/test/CodeGen/X86/cas.ll
@@ -0,0 +1,73 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu %s -o - | FileCheck %s
+
+; C code this came from
+;bool cas(float volatile *p, float *expected, float desired) {
+;  bool success;
+;  __asm__ __volatile__("lock; cmpxchg %[desired], %[mem]; "
+;                       "mov %[expected], %[expected_out]; "
+;                       "sete %[success]"
+;                       : [success] "=a" (success),
+;                         [expected_out] "=rm" (*expected)
+;                       : [expected] "a" (*expected),
+;                         [desired] "q" (desired),
+;                         [mem] "m" (*p)
+;                       : "memory", "cc");
+;  return success;
+;}
+
+define zeroext i1 @cas(float* %p, float* %expected, float %desired) nounwind {
+entry:
+  %p.addr = alloca float*, align 8
+  %expected.addr = alloca float*, align 8
+  %desired.addr = alloca float, align 4
+  %success = alloca i8, align 1
+  store float* %p, float** %p.addr, align 8
+  store float* %expected, float** %expected.addr, align 8
+  store float %desired, float* %desired.addr, align 4
+  %0 = load float** %expected.addr, align 8
+  %1 = load float** %expected.addr, align 8
+  %2 = load float* %1, align 4
+  %3 = load float* %desired.addr, align 4
+  %4 = load float** %p.addr, align 8
+  %5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(float* %0, float %2, float %3, float* %4) nounwind
+  store i8 %5, i8* %success, align 1
+  %6 = load i8* %success, align 1
+  %tobool = trunc i8 %6 to i1
+  ret i1 %tobool
+}
+
+; CHECK: @cas
+; Make sure we're emitting a move from eax.
+; CHECK: #APP
+; CHECK-NEXT: lock;{{.*}}mov %eax,{{.*}}
+; CHECK-NEXT: #NO_APP
+
+define zeroext i1 @cas2(i8* %p, i8* %expected, i1 zeroext %desired) nounwind {
+entry:
+  %p.addr = alloca i8*, align 8
+  %expected.addr = alloca i8*, align 8
+  %desired.addr = alloca i8, align 1
+  %success = alloca i8, align 1
+  store i8* %p, i8** %p.addr, align 8
+  store i8* %expected, i8** %expected.addr, align 8
+  %frombool = zext i1 %desired to i8
+  store i8 %frombool, i8* %desired.addr, align 1
+  %0 = load i8** %expected.addr, align 8
+  %1 = load i8** %expected.addr, align 8
+  %2 = load i8* %1, align 1
+  %tobool = trunc i8 %2 to i1
+  %3 = load i8* %desired.addr, align 1
+  %tobool1 = trunc i8 %3 to i1
+  %4 = load i8** %p.addr, align 8
+  %5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(i8* %0, i1 %tobool, i1 %tobool1, i8* %4) nounwind
+  store i8 %5, i8* %success, align 1
+  %6 = load i8* %success, align 1
+  %tobool2 = trunc i8 %6 to i1
+  ret i1 %tobool2
+}
+
+; CHECK: @cas2
+; Make sure we're emitting a move from %al here.
+; CHECK: #APP
+; CHECK-NEXT: lock;{{.*}}mov %al,{{.*}}
+; CHECK-NEXT: #NO_APP
diff --git a/test/CodeGen/X86/coldcc64.ll b/test/CodeGen/X86/coldcc64.ll
new file mode 100644
index 0000000..4db56bb
--- /dev/null
+++ b/test/CodeGen/X86/coldcc64.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-linux-gnu"
+
+define coldcc void @foo() {
+; CHECK: pushq %rbp
+; CHECK: pushq %r15
+; CHECK: pushq %r14
+; CHECK: pushq %r13
+; CHECK: pushq %r12
+; CHECK: pushq %r11
+; CHECK: pushq %r10
+; CHECK: pushq %r9
+; CHECK: pushq %r8
+; CHECK: pushq %rdi
+; CHECK: pushq %rsi
+; CHECK: pushq %rdx
+; CHECK: pushq %rcx
+; CHECK: pushq %rbx
+; CHECK: movaps %xmm15
+; CHECK: movaps %xmm0
+  call void asm sideeffect "", "~{xmm15},~{xmm0},~{rbp},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rdi},~{rsi},~{rdx},~{rcx},~{rbx}"()
+  ret void
+}
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 276d0db..6d21962 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -431,7 +431,7 @@ return:                                           ; preds = %entry
 ; uitofp expands to an FCMOV instruction which splits the basic block.
 ; Make sure the live range of %AL isn't split.
 @.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
-define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 {
+define void @pr13188(i64* nocapture %this) uwtable ssp sanitize_address align 2 {
 entry:
   %x7 = load i64* %this, align 8
   %sub = add i64 %x7, -1
diff --git a/test/CodeGen/X86/dagcombine_unsafe_math.ll b/test/CodeGen/X86/dagcombine_unsafe_math.ll
new file mode 100644
index 0000000..a3221de
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine_unsafe_math.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s 
+
+
+; rdar://13126763
+; Expression "x + x*x" was mistakenly transformed into "x * 3.0f".
+
+define float @test1(float %x) {
+  %t1 = fmul fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: test1
+; CHECK: vaddss
+}
+
+; (x + x) + x => x * 3.0
+define float @test2(float %x) {
+  %t1 = fadd fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: .long  1077936128
+; CHECK: test2
+; CHECK: vmulss LCPI1_0(%rip), %xmm0, %xmm0
+}
+
+; x + (x + x) => x * 3.0
+define float @test3(float %x) {
+  %t1 = fadd fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: .long  1077936128
+; CHECK: test3
+; CHECK: vmulss LCPI2_0(%rip), %xmm0, %xmm0
+}
+
+; (y + x) + x != x * 3.0
+define float @test4(float %x, float %y) {
+  %t1 = fadd fast float %x, %y
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: test4
+; CHECK: vaddss
+}
diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/CodeGen/X86/dbg-declare.ll
index b73e310..9d13de1 100644
--- a/test/CodeGen/X86/dbg-declare.ll
+++ b/test/CodeGen/X86/dbg-declare.ll
@@ -30,10 +30,8 @@ declare void @llvm.stackrestore(i8*) nounwind
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.1 (trunk 153698)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
index 0efb50e..c9be972 100644
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -15,18 +15,15 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 12, metadata !"small.c", metadata !"/private/tmp", metadata !"clang version 3.1 (trunk 144833)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{metadata !13}
+!11 = metadata !{metadata !13}
 !13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s} ; [ DW_TAG_variable ]
 !14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 8e7c13d..9669d97 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -56,9 +56,9 @@ entry:
   %div = sdiv i16 %x, 10
   ret i16 %div
 ; CHECK: test6:
-; CHECK: imull	$26215, %eax, %eax
-; CHECK: shrl	$31, %ecx
-; CHECK: sarl	$18, %eax
+; CHECK: imull $26215, %eax, %ecx
+; CHECK: sarl $18, %ecx
+; CHECK: shrl $15, %eax
 }
 
 define i32 @test7(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index c64752c..63e6167 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/fast-isel-args-fail.ll b/test/CodeGen/X86/fast-isel-args-fail.ll
new file mode 100644
index 0000000..4995baa
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-args-fail.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-apple-darwin10
+; Requires: Asserts
+
+; Previously, this would cause an assert.
+define i31 @t1(i31 %a, i31 %b, i31 %c) {
+entry:
+  %add = add nsw i31 %b, %a
+  %add1 = add nsw i31 %add, %c
+  ret i31 %add1
+}
diff --git a/test/CodeGen/X86/fast-isel-args.ll b/test/CodeGen/X86/fast-isel-args.ll
new file mode 100644
index 0000000..0f36265
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-args.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -fast-isel-abort-args -verify-machineinstrs -mtriple=x86_64-apple-darwin10
+
+; Just make sure these don't abort when lowering the arguments.
+define i32 @t1(i32 %a, i32 %b, i32 %c) {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  ret i32 %add1
+}
+
+define i64 @t2(i64 %a, i64 %b, i64 %c) {
+entry:
+  %add = add nsw i64 %b, %a
+  %add1 = add nsw i64 %add, %c
+  ret i64 %add1
+}
+
+define i64 @t3(i32 %a, i64 %b, i32 %c) {
+entry:
+  %conv = sext i32 %a to i64
+  %add = add nsw i64 %conv, %b
+  %conv1 = sext i32 %c to i64
+  %add2 = add nsw i64 %add, %conv1
+  ret i64 %add2
+}
diff --git a/test/CodeGen/X86/fast-isel-constant.ll b/test/CodeGen/X86/fast-isel-constant.ll
new file mode 100644
index 0000000..6f9240a
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-constant.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s
+; Make sure fast-isel doesn't reset the materialised constant map
+; across an intrinsic call.
+
+; CHECK: movl	$100000
+; CHECK-NOT: movl	$100000
+define i1 @test1(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %a = shl i32 100000, %v1
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %v2)
+  %ext = extractvalue {i32, i1} %t, 0
+  %sum = shl i32 100000, %ext
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  store i32 %sum, i32* %X
+  br label %overflow
+
+overflow:
+  ret i1 false
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
diff --git a/test/CodeGen/X86/float-asmprint.ll b/test/CodeGen/X86/float-asmprint.ll
new file mode 100644
index 0000000..4aeae7f
--- /dev/null
+++ b/test/CodeGen/X86/float-asmprint.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=x86_64-none-linux < %s | FileCheck %s
+
+; Check that all current floating-point types are correctly emitted to assembly
+; on a little-endian target.
+
+@var128 = global fp128 0xL00000000000000008000000000000000, align 16
+@varppc128 = global ppc_fp128 0xM80000000000000000000000000000000, align 16
+@var80 = global x86_fp80 0xK80000000000000000000, align 16
+@var64 = global double -0.0, align 8
+@var32 = global float -0.0, align 4
+@var16 = global half -0.0, align 2
+
+; CHECK: var128:
+; CHECK-NEXT: .quad 0                         # fp128 -0
+; CHECK-NEXT: .quad -9223372036854775808
+; CHECK-NEXT: .size
+
+; CHECK: varppc128:
+; CHECK-NEXT: .quad 0                         # ppc_fp128 -0
+; CHECK-NEXT: .quad -9223372036854775808
+; CHECK-NEXT: .size
+
+; CHECK: var80:
+; CHECK-NEXT: .quad 0                         # x86_fp80 -0
+; CHECK-NEXT: .short 32768
+; CHECK-NEXT: .zero 6
+; CHECK-NEXT: .size
+
+; CHECK: var64:
+; CHECK-NEXT: .quad -9223372036854775808      # double -0
+; CHECK-NEXT: .size
+
+; CHECK: var32:
+; CHECK-NEXT: .long 2147483648                # float -0
+; CHECK-NEXT: .size
+
+; CHECK: var16:
+; CHECK-NEXT: .short 32768                    # half -0
+; CHECK-NEXT: .size
+
diff --git a/test/CodeGen/X86/fp-load-trunc.ll b/test/CodeGen/X86/fp-load-trunc.ll
index 2ae65c9..a973bef 100644
--- a/test/CodeGen/X86/fp-load-trunc.ll
+++ b/test/CodeGen/X86/fp-load-trunc.ll
@@ -49,8 +49,8 @@ define <8 x float> @test4(<8 x double>* %p) nounwind {
 ; CHECK: movlhps
 ; CHECK: ret
 ; AVX:   test4
-; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
-; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
+; AVX:   vcvtpd2psy
+; AVX:   vcvtpd2psy
 ; AVX:   vinsertf128
 ; AVX:   ret
   %x = load <8 x double>* %p
diff --git a/test/CodeGen/X86/handle-move.ll b/test/CodeGen/X86/handle-move.ll
index e9f7a96..ba96275 100644
--- a/test/CodeGen/X86/handle-move.ll
+++ b/test/CodeGen/X86/handle-move.ll
@@ -16,7 +16,7 @@
 ;       DL:     [0B,16r:0)[128r,144r:2)[144r,144d:1)  0@0B-phi 1@144r 2@128r
 ;         -->   [0B,16r:0)[128r,180r:2)[180r,180d:1)  0@0B-phi 1@180r 2@128r
 ;
-define i32 @f1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+define i32 @f1(i32 %a, i32 %b, i32 %c) nounwind uwtable readnone ssp {
 entry:
   %y = add i32 %c, 1
   %x = udiv i32 %b, %a
@@ -50,7 +50,7 @@ entry:
 ;       %vreg5:         [16r,112r:0)  0@16r
 ;            -->        [16r,120r:0)  0@16r
 ;
-define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+define i32 @f3(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 entry:
   %y = sub i32 %a, %b
   %x = add i32 %a, %b
diff --git a/test/CodeGen/X86/hipe-prologue.ll b/test/CodeGen/X86/hipe-prologue.ll
new file mode 100644
index 0000000..ff3c5c8
--- /dev/null
+++ b/test/CodeGen/X86/hipe-prologue.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+
+; The HiPE compiler (i.e., the native code compiler of the Erlang/OTP system)
+; adds a custom assembly prologue in order to efficiently manipulate the stack
+; at runtime.
+
+; Just to prevent the alloca from being optimized away.
+declare void @dummy_use(i32*, i32)
+
+define {i32, i32} @test_basic(i32 %hp, i32 %p) {
+  ; X32-Linux:       test_basic:
+  ; X32-Linux-NOT:   calll inc_stack_0
+
+  ; X64-Linux:       test_basic:
+  ; X64-Linux-NOT:   callq inc_stack_0
+
+  %mem = alloca i32, i32 10
+  call void @dummy_use (i32* %mem, i32 10)
+  %1 = insertvalue {i32, i32} undef, i32 %hp, 0
+  %2 = insertvalue {i32, i32} %1, i32 %p, 1
+  ret {i32, i32} %1
+}
+
+define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) {
+  ; X32-Linux:       test_basic_hipecc:
+  ; X32-Linux:       leal -156(%esp), %ebx
+  ; X32-Linux-NEXT:  cmpl 76(%ebp), %ebx
+  ; X32-Linux-NEXT:  jb .LBB1_1
+
+  ; X32-Linux:       ret
+
+  ; X32-Linux:       .LBB1_1:
+  ; X32-Linux-NEXT:  calll inc_stack_0
+
+  ; X64-Linux:       test_basic_hipecc:
+  ; X64-Linux:       leaq -232(%rsp), %r14
+  ; X64-Linux-NEXT:  cmpq 144(%rbp), %r14
+  ; X64-Linux-NEXT:  jb .LBB1_1
+
+  ; X64-Linux:       ret
+
+  ; X64-Linux:       .LBB1_1:
+  ; X64-Linux-NEXT:  callq inc_stack_0
+
+  %mem = alloca i32, i32 10
+  call void @dummy_use (i32* %mem, i32 10)
+  %1 = insertvalue {i32, i32} undef, i32 %hp, 0
+  %2 = insertvalue {i32, i32} %1, i32 %p, 1
+  ret {i32, i32} %2
+}
+
+define cc 11 {i32,i32,i32} @test_nocall_hipecc(i32 %hp,i32 %p,i32 %x,i32 %y) {
+  ; X32-Linux:       test_nocall_hipecc:
+  ; X32-Linux-NOT:   calll inc_stack_0
+
+  ; X64-Linux:       test_nocall_hipecc:
+  ; X64-Linux-NOT:   callq inc_stack_0
+
+  %1 = add i32 %x, %y
+  %2 = mul i32 42, %1
+  %3 = sub i32 24, %2
+  %4 = insertvalue {i32, i32, i32} undef, i32 %hp, 0
+  %5 = insertvalue {i32, i32, i32} %4, i32 %p, 1
+  %6 = insertvalue {i32, i32, i32} %5, i32 %p, 2
+  ret {i32, i32, i32} %6
+}
diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll
index 1cb54b3..7b79d06 100644
--- a/test/CodeGen/X86/imul-lea-2.ll
+++ b/test/CodeGen/X86/imul-lea-2.ll
@@ -1,15 +1,19 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 3
-; RUN: llc < %s -march=x86-64 | grep shl | count 1
-; RUN: llc < %s -march=x86-64 | not grep imul
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK-NOT: imul
 
 define i64 @t1(i64 %a) nounwind readnone {
 entry:
-	%0 = mul i64 %a, 81		; <i64> [#uses=1]
-	ret i64 %0
+  %0 = mul i64 %a, 81
+; CHECK: lea
+; CHECK: lea
+  ret i64 %0
 }
 
 define i64 @t2(i64 %a) nounwind readnone {
 entry:
-	%0 = mul i64 %a, 40		; <i64> [#uses=1]
-	ret i64 %0
+  %0 = mul i64 %a, 40
+; CHECK: shl
+; CHECK: lea
+  ret i64 %0
 }
diff --git a/test/CodeGen/X86/imul-lea.ll b/test/CodeGen/X86/imul-lea.ll
index 4e8e2af..d55ece7 100644
--- a/test/CodeGen/X86/imul-lea.ll
+++ b/test/CodeGen/X86/imul-lea.ll
@@ -1,10 +1,12 @@
-; RUN: llc < %s -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 declare i32 @foo()
 
 define i32 @test() {
-        %tmp.0 = tail call i32 @foo( )          ; <i32> [#uses=1]
-        %tmp.1 = mul i32 %tmp.0, 9              ; <i32> [#uses=1]
-        ret i32 %tmp.1
+  %tmp.0 = tail call i32 @foo( )
+  %tmp.1 = mul i32 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: lea
+  ret i32 %tmp.1
 }
 
diff --git a/test/CodeGen/X86/imul64-lea.ll b/test/CodeGen/X86/imul64-lea.ll
new file mode 100644
index 0000000..047c129
--- /dev/null
+++ b/test/CodeGen/X86/imul64-lea.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 | FileCheck %s
+
+; Test that 64-bit LEAs are generated for both LP64 and ILP32 in 64-bit mode.
+declare i64 @foo64()
+
+define i64 @test64() {
+  %tmp.0 = tail call i64 @foo64( )
+  %tmp.1 = mul i64 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: leaq
+  ret i64 %tmp.1
+}
+
+; Test that 32-bit LEAs are generated for both LP64 and ILP32 in 64-bit mode.
+declare i32 @foo32()
+
+define i32 @test32() {
+  %tmp.0 = tail call i32 @foo32( )
+  %tmp.1 = mul i32 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: leal
+  ret i32 %tmp.1
+}
+
diff --git a/test/CodeGen/X86/insertelement-copytoregs.ll b/test/CodeGen/X86/insertelement-copytoregs.ll
index 34a29ca..88ff4da 100644
--- a/test/CodeGen/X86/insertelement-copytoregs.ll
+++ b/test/CodeGen/X86/insertelement-copytoregs.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK-NOT: IMPLICIT_DEF
 
 define void @foo(<2 x float>* %p) {
   %t = insertelement <2 x float> undef, float 0.0, i32 0
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index 43f69b0..2112809 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,13 +1,15 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep "lea	EAX, DWORD PTR \[... + 4\*... - 5\]"
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep add
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
 
 define i32 @test1(i32 %A, i32 %B) {
-        %tmp1 = shl i32 %A, 2           ; <i32> [#uses=1]
-        %tmp3 = add i32 %B, -5          ; <i32> [#uses=1]
-        %tmp4 = add i32 %tmp3, %tmp1            ; <i32> [#uses=1]
-        ret i32 %tmp4
+  %tmp1 = shl i32 %A, 2
+  %tmp3 = add i32 %B, -5
+  %tmp4 = add i32 %tmp3, %tmp1
+; The above computation of %tmp4 should match a single lea, without using
+; actual add instructions.
+; CHECK-NOT: add
+; CHECK: lea {{[A-Z]+}}, DWORD PTR [{{[A-Z]+}} + 4*{{[A-Z]+}} - 5]
+
+  ret i32 %tmp4
 }
 
 
diff --git a/test/CodeGen/X86/lea-4.ll b/test/CodeGen/X86/lea-4.ll
index 2171204..cef4726 100644
--- a/test/CodeGen/X86/lea-4.ll
+++ b/test/CodeGen/X86/lea-4.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 2
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 define zeroext i16 @t1(i32 %on_off) nounwind {
 entry:
-	%0 = sub i32 %on_off, 1
-	%1 = mul i32 %0, 2
-	%2 = trunc i32 %1 to i16
-	%3 = zext i16 %2 to i32
-	%4 = trunc i32 %3 to i16
-	ret i16 %4
+  %0 = sub i32 %on_off, 1
+  %1 = mul i32 %0, 2
+  %2 = trunc i32 %1 to i16
+  %3 = zext i16 %2 to i32
+  %4 = trunc i32 %3 to i16
+; CHECK: lea
+  ret i16 %4
 }
 
 define i32 @t2(i32 %on_off) nounwind {
 entry:
-	%0 = sub i32 %on_off, 1
-	%1 = mul i32 %0, 2
-        %2 = and i32 %1, 65535
-	ret i32 %2
+  %0 = sub i32 %on_off, 1
+  %1 = mul i32 %0, 2
+  %2 = and i32 %1, 65535
+; CHECK: lea
+  ret i32 %2
 }
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
index c9f2fc2..71ef2d3 100644
--- a/test/CodeGen/X86/legalize-shift-64.ll
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -54,3 +54,14 @@ define i64 @test4(i64 %xx, i32 %test) nounwind {
 ; CHECK: orl	%esi, %eax
 ; CHECK: sarl	%cl, %edx
 }
+
+; PR14668
+define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) {
+  %shl = shl <2 x i64> %A, %B
+  ret <2 x i64> %shl
+; CHECK: test5
+; CHECK: shl
+; CHECK: shldl
+; CHECK: shl
+; CHECK: shldl
+}
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 949d6a4..630c0ed 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2
-; RUN: llc < %s -mattr=+sse2      -mtriple=i686-pc-mingw32 -mcpu=core2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Darwin
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-pc-mingw32 -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Mingw32
 ; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
 ; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
 ; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
@@ -9,12 +9,19 @@
 
 define void @t1(i32 %argc, i8** %argv) nounwind  {
 entry:
-; SSE2: t1:
-; SSE2: movsd _.str+16, %xmm0
-; SSE2: movsd %xmm0, 16(%esp)
-; SSE2: movaps _.str, %xmm0
-; SSE2: movaps %xmm0
-; SSE2: movb $0, 24(%esp)
+; SSE2-Darwin: t1:
+; SSE2-Darwin: movsd _.str+16, %xmm0
+; SSE2-Darwin: movsd %xmm0, 16(%esp)
+; SSE2-Darwin: movaps _.str, %xmm0
+; SSE2-Darwin: movaps %xmm0
+; SSE2-Darwin: movb $0, 24(%esp)
+
+; SSE2-Mingw32: t1:
+; SSE2-Mingw32: movsd _.str+16, %xmm0
+; SSE2-Mingw32: movsd %xmm0, 16(%esp)
+; SSE2-Mingw32: movaps _.str, %xmm0
+; SSE2-Mingw32: movups %xmm0
+; SSE2-Mingw32: movb $0, 24(%esp)
 
 ; SSE1: t1:
 ; SSE1: movaps _.str, %xmm0
@@ -48,9 +55,13 @@ entry:
 
 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
-; SSE2: t2:
-; SSE2: movaps (%eax), %xmm0
-; SSE2: movaps %xmm0, (%eax)
+; SSE2-Darwin: t2:
+; SSE2-Darwin: movaps (%eax), %xmm0
+; SSE2-Darwin: movaps %xmm0, (%eax)
+
+; SSE2-Mingw32: t2:
+; SSE2-Mingw32: movaps (%eax), %xmm0
+; SSE2-Mingw32: movaps %xmm0, (%eax)
 
 ; SSE1: t2:
 ; SSE1: movaps (%eax), %xmm0
@@ -79,11 +90,17 @@ entry:
 
 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
-; SSE2: t3:
-; SSE2: movsd (%eax), %xmm0
-; SSE2: movsd 8(%eax), %xmm1
-; SSE2: movsd %xmm1, 8(%eax)
-; SSE2: movsd %xmm0, (%eax)
+; SSE2-Darwin: t3:
+; SSE2-Darwin: movsd (%eax), %xmm0
+; SSE2-Darwin: movsd 8(%eax), %xmm1
+; SSE2-Darwin: movsd %xmm1, 8(%eax)
+; SSE2-Darwin: movsd %xmm0, (%eax)
+
+; SSE2-Mingw32: t3:
+; SSE2-Mingw32: movsd (%eax), %xmm0
+; SSE2-Mingw32: movsd 8(%eax), %xmm1
+; SSE2-Mingw32: movsd %xmm1, 8(%eax)
+; SSE2-Mingw32: movsd %xmm0, (%eax)
 
 ; SSE1: t3:
 ; SSE1: movl
@@ -122,15 +139,25 @@ entry:
 
 define void @t4() nounwind {
 entry:
-; SSE2: t4:
-; SSE2: movw $120
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
+; SSE2-Darwin: t4:
+; SSE2-Darwin: movw $120
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+
+; SSE2-Mingw32: t4:
+; SSE2-Mingw32: movw $120
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
 
 ; SSE1: t4:
 ; SSE1: movw $120
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 39c7fba..3372a4a 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -87,8 +87,34 @@ entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
   ret void
 
+; DARWIN: test5:
 ; DARWIN: movabsq	$7016996765293437281
 ; DARWIN: movabsq	$7016996765293437184
 }
 
 
+; PR14896
+@.str2 = private unnamed_addr constant [2 x i8] c"x\00", align 1
+
+define void @test6() nounwind uwtable {
+entry:
+; DARWIN: test6
+; DARWIN: movw $0, 8
+; DARWIN: movq $120, 0
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0), i64 10, i32 1, i1 false)
+  ret void
+}
+
+define void @PR15348(i8* %a, i8* %b) {
+; Ensure that alignment of '0' in an @llvm.memcpy intrinsic results in
+; unaligned loads and stores.
+; LINUX: PR15348
+; LINUX: movb
+; LINUX: movb
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index b35f261..0d479f0 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -20,15 +20,18 @@ entry:
 ; X86: movl $0,
 ; X86: movl $0,
 ; X86-NOT: movl $0,
+; X86: ret
 
 ; XMM: xorps %xmm{{[0-9]+}}, [[Z:%xmm[0-9]+]]
 ; XMM: movaps [[Z]],
 ; XMM: movaps [[Z]],
 ; XMM-NOT: movaps
+; XMM: ret
 
 ; YMM: vxorps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, [[Z:%ymm[0-9]+]]
 ; YMM: vmovaps [[Z]],
 ; YMM-NOT: movaps
+; YMM: ret
 
 	call void @foo( %struct.x* %up_mvd116 ) nounwind 
 	ret void
@@ -37,3 +40,16 @@ entry:
 declare void @foo(%struct.x*)
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+define void @PR15348(i8* %a) {
+; Ensure that alignment of '0' in an @llvm.memset intrinsic results in
+; unaligned loads and stores.
+; XMM: PR15348
+; XMM: movb $0,
+; XMM: movl $0,
+; XMM: movl $0,
+; XMM: movl $0,
+; XMM: movl $0,
+  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 17, i32 0, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll
index 24d28ad..5048a93 100644
--- a/test/CodeGen/X86/ms-inline-asm.ll
+++ b/test/CodeGen/X86/ms-inline-asm.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
 
 define i32 @t1() nounwind {
 entry:
   %0 = tail call i32 asm sideeffect inteldialect "mov eax, $1\0A\09mov $0, eax", "=r,r,~{eax},~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
   ret i32 %0
 ; CHECK: t1
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, ecx
@@ -18,6 +19,7 @@ entry:
   call void asm sideeffect inteldialect "mov eax, $$1", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind
   ret void
 ; CHECK: t2
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, 1
@@ -32,6 +34,7 @@ entry:
   call void asm sideeffect inteldialect "mov eax, DWORD PTR [$0]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %V.addr) nounwind
   ret void
 ; CHECK: t3
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, DWORD PTR {{[[esp]}}
@@ -53,6 +56,7 @@ entry:
   %0 = load i32* %b1, align 4
   ret i32 %0
 ; CHECK: t18
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: lea ebx, foo
@@ -61,3 +65,46 @@ entry:
 ; CHECK: .att_syntax
 ; CHECK: {{## InlineAsm End|#NO_APP}}
 }
+
+define void @t19_helper() nounwind {
+entry:
+  ret void
+}
+
+define void @t19() nounwind {
+entry:
+  call void asm sideeffect inteldialect "call $0", "r,~{dirflag},~{fpsr},~{flags}"(void ()* @t19_helper) nounwind
+  ret void
+; CHECK: t19:
+; CHECK: movl %esp, %ebp
+; CHECK: movl ${{_?}}t19_helper, %eax
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: call eax
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+}
+
+@results = global [2 x i32] [i32 3, i32 2], align 4
+
+define i32* @t30() nounwind ssp {
+entry:
+  %res = alloca i32*, align 4
+  call void asm sideeffect inteldialect "lea edi, dword ptr $0", "*m,~{edi},~{dirflag},~{fpsr},~{flags}"([2 x i32]* @results) nounwind
+  call void asm sideeffect inteldialect "mov dword ptr $0, edi", "=*m,~{dirflag},~{fpsr},~{flags}"(i32** %res) nounwind
+  %0 = load i32** %res, align 4
+  ret i32* %0
+; CHECK: t30:
+; CHECK: movl %esp, %ebp
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: lea edi, dword ptr [{{_?}}results]
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: mov dword ptr [esi], edi
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+; CHECK: movl (%esi), %eax
+}
diff --git a/test/CodeGen/X86/no-cmov.ll b/test/CodeGen/X86/no-cmov.ll
new file mode 100644
index 0000000..62d73b0
--- /dev/null
+++ b/test/CodeGen/X86/no-cmov.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=x86 -mcpu=i486 < %s | FileCheck %s
+
+define i32 @test1(i32 %g, i32* %j) {
+  %tobool = icmp eq i32 %g, 0
+  %cmp = load i32* %j, align 4
+  %retval.0 = select i1 %tobool, i32 1, i32 %cmp
+  ret i32 %retval.0
+
+; CHECK: test1:
+; CHECK-NOT: cmov
+}
diff --git a/test/CodeGen/X86/pmovsx-inreg.ll b/test/CodeGen/X86/pmovsx-inreg.ll
new file mode 100644
index 0000000..d8c27f2
--- /dev/null
+++ b/test/CodeGen/X86/pmovsx-inreg.ll
@@ -0,0 +1,176 @@
+; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck -check-prefix=SSE41 %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck -check-prefix=AVX1 %s
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s
+
+; PR14887
+; These tests inject a store into the chain to test the inreg versions of pmovsx
+
+define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i8>* %in, align 1
+  %sext = sext <2 x i8> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+; SSE41: test1:
+; SSE41: pmovsxbq
+
+; AVX1: test1:
+; AVX1: vpmovsxbq
+
+; AVX2: test1:
+; AVX2: vpmovsxbq
+}
+
+define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i8>* %in, align 1
+  %sext = sext <4 x i8> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test2:
+; AVX2: vpmovsxbq
+}
+
+define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
+  %wide.load35 = load <4 x i8>* %in, align 1
+  %sext = sext <4 x i8> %wide.load35 to <4 x i32>
+  store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
+  store <4 x i32> %sext, <4 x i32>* %out, align 8
+  ret void
+
+; SSE41: test3:
+; SSE41: pmovsxbd
+
+; AVX1: test3:
+; AVX1: vpmovsxbd
+
+; AVX2: test3:
+; AVX2: vpmovsxbd
+}
+
+define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
+  %wide.load35 = load <8 x i8>* %in, align 1
+  %sext = sext <8 x i8> %wide.load35 to <8 x i32>
+  store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
+  store <8 x i32> %sext, <8 x i32>* %out, align 8
+  ret void
+
+; AVX2: test4:
+; AVX2: vpmovsxbd
+}
+
+define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
+  %wide.load35 = load <8 x i8>* %in, align 1
+  %sext = sext <8 x i8> %wide.load35 to <8 x i16>
+  store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8
+  store <8 x i16> %sext, <8 x i16>* %out, align 8
+  ret void
+
+; SSE41: test5:
+; SSE41: pmovsxbw
+
+; AVX1: test5:
+; AVX1: vpmovsxbw
+
+; AVX2: test5:
+; AVX2: vpmovsxbw
+}
+
+define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
+  %wide.load35 = load <16 x i8>* %in, align 1
+  %sext = sext <16 x i8> %wide.load35 to <16 x i16>
+  store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8
+  store <16 x i16> %sext, <16 x i16>* %out, align 8
+  ret void
+
+; AVX2: test6:
+; FIXME: v16i8 -> v16i16 is scalarized.
+; AVX2-NOT: pmovsx
+}
+
+define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i16>* %in, align 1
+  %sext = sext <2 x i16> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+
+; SSE41: test7:
+; SSE41: pmovsxwq
+
+; AVX1: test7:
+; AVX1: vpmovsxwq
+
+; AVX2: test7:
+; AVX2: vpmovsxwq
+}
+
+define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i16>* %in, align 1
+  %sext = sext <4 x i16> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test8:
+; AVX2: vpmovsxwq
+}
+
+define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
+  %wide.load35 = load <4 x i16>* %in, align 1
+  %sext = sext <4 x i16> %wide.load35 to <4 x i32>
+  store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
+  store <4 x i32> %sext, <4 x i32>* %out, align 8
+  ret void
+
+; SSE41: test9:
+; SSE41: pmovsxwd
+
+; AVX1: test9:
+; AVX1: vpmovsxwd
+
+; AVX2: test9:
+; AVX2: vpmovsxwd
+}
+
+define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
+  %wide.load35 = load <8 x i16>* %in, align 1
+  %sext = sext <8 x i16> %wide.load35 to <8 x i32>
+  store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
+  store <8 x i32> %sext, <8 x i32>* %out, align 8
+  ret void
+
+; AVX2: test10:
+; AVX2: vpmovsxwd
+}
+
+define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i32>* %in, align 1
+  %sext = sext <2 x i32> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+; SSE41: test11:
+; SSE41: pmovsxdq
+
+; AVX1: test11:
+; AVX1: vpmovsxdq
+
+; AVX2: test11:
+; AVX2: vpmovsxdq
+}
+
+define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i32>* %in, align 1
+  %sext = sext <4 x i32> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test12:
+; AVX2: vpmovsxdq
+}
diff --git a/test/CodeGen/X86/pr10499.ll b/test/CodeGen/X86/pr10499.ll
new file mode 100644
index 0000000..f9cc747
--- /dev/null
+++ b/test/CodeGen/X86/pr10499.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx -mattr=-sse2
+
+; No check as PR10499 is a crashing bug.
+
+define void @autogen_24438_500() {
+BB:
+  %I = insertelement <8 x i32> undef, i32 -1, i32 4
+  %BC = bitcast <8 x i32> %I to <8 x float>
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %ZE = fpext <8 x float> %BC to <8 x double>
+  br label %CF
+}
diff --git a/test/CodeGen/X86/pr14562.ll b/test/CodeGen/X86/pr14562.ll
new file mode 100644
index 0000000..e66f175
--- /dev/null
+++ b/test/CodeGen/X86/pr14562.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+@temp1 = global i64 -77129852189294865, align 8
+
+define void @foo() nounwind {
+  %x = load i64* @temp1, align 8
+  %s = shl i64 %x, 32
+  %t = trunc i64 %s to i32
+  %z = zext i32 %t to i64
+  store i64 %z, i64* @temp1, align 8
+; CHECK: movl $0, {{_?}}temp1+4                                                
+; CHECK: movl $0, {{_?}}temp1
+  ret void
+}
+
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll
new file mode 100644
index 0000000..c8aaf32
--- /dev/null
+++ b/test/CodeGen/X86/pr15267.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
+
+define <4 x i3> @test1(<4 x i3>* %in) nounwind {
+  %ret = load <4 x i3>* %in, align 1
+  ret <4 x i3> %ret
+}
+
+; CHECK: test1
+; CHECK: movzwl
+; CHECK: shrl $3
+; CHECK: andl $7
+; CHECK: andl $7
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $6
+; CHECK: andl $7
+; CHECK: pinsrd $2
+; CHECK: shrl $9
+; CHECK: andl $7
+; CHECK: pinsrd $3
+; CHECK: ret
+
+define <4 x i1> @test2(<4 x i1>* %in) nounwind {
+  %ret = load <4 x i1>* %in, align 1
+  ret <4 x i1> %ret
+}
+
+; CHECK: test2
+; CHECK: movzbl
+; CHECK: shrl
+; CHECK: andl $1
+; CHECK: andl $1
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $2
+; CHECK: andl $1
+; CHECK: pinsrd $2
+; CHECK: shrl $3
+; CHECK: andl $1
+; CHECK: pinsrd $3
+; CHECK: ret
+
+define <4 x i64> @test3(<4 x i1>* %in) nounwind {
+  %wide.load35 = load <4 x i1>* %in, align 1
+  %sext = sext <4 x i1> %wide.load35 to <4 x i64>
+  ret <4 x i64> %sext
+}
+
+; CHECK: test3
+; CHECK: movzbl
+; CHECK: shrl
+; CHECK: andl $1
+; CHECK: andl $1
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $2
+; CHECK: andl $1
+; CHECK: pinsrd $2
+; CHECK: shrl $3
+; CHECK: andl $1
+; CHECK: pinsrd $3
+; CHECK: pslld
+; CHECK: psrad
+; CHECK: pmovsxdq
+; CHECK: pmovsxdq
+; CHECK: ret
diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll
new file mode 100644
index 0000000..b792ffa
--- /dev/null
+++ b/test/CodeGen/X86/pre-ra-sched.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
+; RUN:     2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; rdar:13279013: pre-RA-sched should not check all interferences and
+; repush them on the ready queue after scheduling each instruction.
+;
+; CHECK: *** List Scheduling
+; CHECK: Interfering reg EFLAGS
+; CHECK: Repushing
+; CHECK: Repushing
+; CHECK: Repushing
+; CHECK-NOT: Repushing
+; CHECK: *** Final schedule
+define i32 @test(i8* %pin) #0 {
+  %g0 = getelementptr inbounds i8* %pin, i64 0
+  %l0 = load i8* %g0, align 1
+
+  %g1a = getelementptr inbounds i8* %pin, i64 1
+  %l1a = load i8* %g1a, align 1
+  %z1a = zext i8 %l1a to i32
+  %g1b = getelementptr inbounds i8* %pin, i64 2
+  %l1b = load i8* %g1b, align 1
+  %z1b = zext i8 %l1b to i32
+  %c1 = icmp ne i8 %l0, 0
+  %x1 = xor i32 %z1a, %z1b
+  %s1 = select i1 %c1, i32 %z1a, i32 %x1
+
+  %g2a = getelementptr inbounds i8* %pin, i64 3
+  %l2a = load i8* %g2a, align 1
+  %z2a = zext i8 %l2a to i32
+  %g2b = getelementptr inbounds i8* %pin, i64 4
+  %l2b = load i8* %g2b, align 1
+  %z2b = zext i8 %l2b to i32
+  %x2 = xor i32 %z2a, %z2b
+  %s2 = select i1 %c1, i32 %z2a, i32 %x2
+
+  %g3a = getelementptr inbounds i8* %pin, i64 5
+  %l3a = load i8* %g3a, align 1
+  %z3a = zext i8 %l3a to i32
+  %g3b = getelementptr inbounds i8* %pin, i64 6
+  %l3b = load i8* %g3b, align 1
+  %z3b = zext i8 %l3b to i32
+  %x3 = xor i32 %z3a, %z3b
+  %s3 = select i1 %c1, i32 %z3a, i32 %x3
+
+  %c3 = icmp ne i8 %l1a, 0
+  %c4 = icmp ne i8 %l2a, 0
+
+  %s4 = select i1 %c3, i32 %s1, i32 %s2
+  %s5 = select i1 %c4, i32 %s4, i32 %s3
+
+  ret i32 %s5
+}
+
+attributes #0 = { nounwind ssp uwtable }
diff --git a/test/CodeGen/X86/rip-rel-lea.ll b/test/CodeGen/X86/rip-rel-lea.ll
new file mode 100644
index 0000000..71dacf6
--- /dev/null
+++ b/test/CodeGen/X86/rip-rel-lea.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -relocation-model=pic | FileCheck %s -check-prefix=PICX32
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=PIC32
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
+
+@a = internal global double 3.4
+define double* @foo() nounwind {
+  %a = getelementptr double* @a, i64 0
+  ret double* %a
+  
+; PIC64:    leaq	a(%rip)
+; PICX32:   leal	a(%rip)
+; PIC32:    leal	a@GOTOFF(%eax)
+}
diff --git a/test/CodeGen/X86/sandybridge-loads.ll b/test/CodeGen/X86/sandybridge-loads.ll
new file mode 100644
index 0000000..5a23cf1
--- /dev/null
+++ b/test/CodeGen/X86/sandybridge-loads.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
+
+;CHECK: wideloads
+;CHECK: vmovaps
+;CHECK: vinsertf128
+;CHECK: vmovaps
+;CHECK-NOT: vinsertf128
+;CHECK: ret
+
+define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16  ; <---- unaligned!
+  %v1 = load <8 x float>* %b, align 32  ; <---- aligned!
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %v2 = load <8 x float>* %c, align 32  ; <---- aligned!
+  %m1 = fcmp olt <8 x float> %v2, %v0
+  %mand = and <8 x i1> %m1, %m0
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 32
+  ret void
+}
+
+; CHECK: widestores
+; loads:
+; CHECK: vmovaps
+; CHECK: vmovaps
+; stores:
+; CHECK: vmovaps
+; CHECK: vextractf128
+; CHECK: vmovaps
+;CHECK: ret
+
+define void @widestores(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 32
+  %v1 = load <8 x float>* %b, align 32
+  store <8 x float> %v0, <8 x float>* %b, align 32 ; <--- aligned
+  store <8 x float> %v1, <8 x float>* %a, align 16 ; <--- unaligned
+  ret void
+}
+
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
new file mode 100644
index 0000000..f364d1f
--- /dev/null
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_SINCOS
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_NOOPT
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS
+
+; Combine sin / cos into a single call.
+; rdar://13087969
+
+define float @test1(float %x) nounwind {
+entry:
+; GNU_SINCOS: test1:
+; GNU_SINCOS: callq sincosf
+; GNU_SINCOS: movss 4(%rsp), %xmm0
+; GNU_SINCOS: addss (%rsp), %xmm0
+
+; OSX_SINCOS: test1:
+; OSX_SINCOS: callq ___sincosf_stret
+; OSX_SINCOS: addss %xmm1, %xmm0
+
+; OSX_NOOPT: test1
+; OSX_NOOPT: callq _cosf
+; OSX_NOOPT: callq _sinf
+  %call = tail call float @sinf(float %x) nounwind readnone
+  %call1 = tail call float @cosf(float %x) nounwind readnone
+  %add = fadd float %call, %call1
+  ret float %add
+}
+
+define double @test2(double %x) nounwind {
+entry:
+; GNU_SINCOS: test2:
+; GNU_SINCOS: callq sincos
+; GNU_SINCOS: movsd 16(%rsp), %xmm0
+; GNU_SINCOS: addsd 8(%rsp), %xmm0
+
+; OSX_SINCOS: test2:
+; OSX_SINCOS: callq ___sincos_stret
+; OSX_SINCOS: addsd %xmm1, %xmm0
+
+; OSX_NOOPT: test2
+; OSX_NOOPT: callq _cos
+; OSX_NOOPT: callq _sin
+  %call = tail call double @sin(double %x) nounwind readnone
+  %call1 = tail call double @cos(double %x) nounwind readnone
+  %add = fadd double %call, %call1
+  ret double %add
+}
+
+define x86_fp80 @test3(x86_fp80 %x) nounwind {
+entry:
+; GNU_SINCOS: test3:
+; GNU_SINCOS: callq sinl
+; GNU_SINCOS: callq cosl
+; GNU_SINCOS: ret
+  %call = tail call x86_fp80 @sinl(x86_fp80 %x) nounwind
+  %call1 = tail call x86_fp80 @cosl(x86_fp80 %x) nounwind
+  %add = fadd x86_fp80 %call, %call1
+  ret x86_fp80 %add
+}
+
+declare float  @sinf(float) readonly
+declare double @sin(double) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
+
+declare x86_fp80 @sinl(x86_fp80)
+declare x86_fp80 @cosl(x86_fp80)
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 67ce1be..30a0fbe 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -29,7 +29,6 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
 ; CHECK: andnps
-; CHECK: andps
 ; CHECK: orps
 ; CHECK: ret
 
@@ -44,7 +43,6 @@ define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
 ; CHECK: andnps
-; CHECK: andps
 ; CHECK: orps
 ; CHECK: ret
 
diff --git a/test/CodeGen/X86/stack-align-memcpy.ll b/test/CodeGen/X86/stack-align-memcpy.ll
new file mode 100644
index 0000000..74945e5
--- /dev/null
+++ b/test/CodeGen/X86/stack-align-memcpy.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -force-align-stack -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
+
+%struct.foo = type { [88 x i8] }
+
+; PR15249
+; We can't use rep;movsl here because it clobbers the base pointer in %esi.
+define void @test1(%struct.foo* nocapture %x, i32 %y) nounwind {
+  %dynalloc = alloca i8, i32 %y, align 1
+  call void @bar(i8* %dynalloc, %struct.foo* align 4 byval %x)
+  ret void
+
+; CHECK: test1:
+; CHECK: andl $-16, %esp
+; CHECK: movl %esp, %esi
+; CHECK-NOT: rep;movsl
+}
+
+declare void @bar(i8* nocapture, %struct.foo* align 4 byval) nounwind
diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll
index c075114..1e9ca1d 100644
--- a/test/CodeGen/X86/stack-protector.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -1,28 +1,3141 @@
-; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs:
-; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs:
-; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs:
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_guard"
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_fail"
+; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-I386 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-X64 %s
+; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-KERNEL-X64 %s
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | FileCheck --check-prefix=DARWIN-X64 %s
 
-@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
+%struct.foo = type { [16 x i8] }
+%struct.foo.0 = type { [4 x i8] }
+%struct.pair = type { i32, i32 }
+%struct.nest = type { %struct.pair, %struct.pair }
+%struct.vec = type { <4 x i32> }
+%class.A = type { [2 x i8] }
+%struct.deep = type { %union.anon }
+%union.anon = type { %struct.anon }
+%struct.anon = type { %struct.anon.0 }
+%struct.anon.0 = type { %union.anon.1 }
+%union.anon.1 = type { [2 x i8] }
+%struct.small = type { i8 }
 
-define void @test(i8* %a) nounwind ssp {
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; test1a: array of [16 x i8] 
+;         no ssp attribute
+; Requires no protector.
+define void @test1a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test1a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test1a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test1a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test1a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1b: array of [16 x i8] 
+;         ssp attribute
+; Requires protector.
+define void @test1b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test1b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1c: array of [16 x i8] 
+;         sspstrong attribute
+; Requires protector.
+define void @test1c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test1c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1d: array of [16 x i8] 
+;         sspreq attribute
+; Requires protector.
+define void @test1d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test1d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test2a: struct { [16 x i8] }
+;         no ssp attribute
+; Requires no protector.
+define void @test2a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test2a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test2a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test2a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test2a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2b: struct { [16 x i8] }
+;          ssp attribute
+; Requires protector.
+define void @test2b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test2b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2c: struct { [16 x i8] }
+;          sspstrong attribute
+; Requires protector.
+define void @test2c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test2c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2d: struct { [16 x i8] }
+;          sspreq attribute
+; Requires protector.
+define void @test2d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test2d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test3a:  array of [4 x i8]
+;          no ssp attribute
+; Requires no protector.
+define void @test3a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test3a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test3a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test3a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test3a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3b:  array [4 x i8]
+;          ssp attribute
+; Requires no protector.
+define void @test3b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test3b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test3b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test3b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test3b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3c:  array of [4 x i8]
+;          sspstrong attribute
+; Requires protector.
+define void @test3c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test3c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test3c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test3c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test3c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3d:  array of [4 x i8]
+;          sspreq attribute
+; Requires protector.
+define void @test3d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test3d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test3d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test3d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test3d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test4a:  struct { [4 x i8] }
+;          no ssp attribute
+; Requires no protector.
+define void @test4a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test4a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test4a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test4a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test4a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4b:  struct { [4 x i8] }
+;          ssp attribute
+; Requires no protector.
+define void @test4b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test4b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test4b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test4b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test4b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4c:  struct { [4 x i8] }
+;          sspstrong attribute
+; Requires protector.
+define void @test4c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test4c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test4c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test4c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test4c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4d:  struct { [4 x i8] }
+;          sspreq attribute
+; Requires protector.
+define void @test4d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test4d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test4d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test4d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test4d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test5a:  no arrays / no nested arrays
+;          no ssp attribute
+; Requires no protector.
+define void @test5a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test5a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5b:  no arrays / no nested arrays
+;          ssp attribute
+; Requires no protector.
+define void @test5b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test5b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5c:  no arrays / no nested arrays
+;          sspstrong attribute
+; Requires no protector.
+define void @test5c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test5c:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5c:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5c:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5c:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5d:  no arrays / no nested arrays
+;          sspreq attribute
+; Requires protector.
+define void @test5d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test5d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test5d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test5d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test5d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test6a:  Address-of local taken (j = &a)
+;          no ssp attribute
+; Requires no protector.
+define void @test6a() nounwind uwtable {
+entry:
+; LINUX-I386: test6a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test6a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test6a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test6a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6b:  Address-of local taken (j = &a)
+;          ssp attribute
+; Requires no protector.
+define void @test6b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test6b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test6b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test6b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test6b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6c:  Address-of local taken (j = &a)
+;          sspstrong attribute
+; Requires protector.
+define void @test6c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test6c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test6c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test6c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test6c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6d:  Address-of local taken (j = &a)
+;          sspreq attribute
+; Requires protector.
+define void @test6d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test6d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test6d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test6d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test6d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test7a:  PtrToInt Cast
+;          no ssp attribute
+; Requires no protector.
+define void @test7a() nounwind uwtable readnone {
+entry:
+; LINUX-I386: test7a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test7a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test7a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test7a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7b:  PtrToInt Cast
+;          ssp attribute
+; Requires no protector.
+define void @test7b() nounwind uwtable readnone ssp {
+entry:
+; LINUX-I386: test7b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test7b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test7b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test7b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7c:  PtrToInt Cast
+;          sspstrong attribute
+; Requires protector.
+define void @test7c() nounwind uwtable readnone sspstrong {
+entry:
+; LINUX-I386: test7c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test7c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test7c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test7c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7d:  PtrToInt Cast
+;          sspreq attribute
+; Requires protector.
+define void @test7d() nounwind uwtable readnone sspreq {
+entry:
+; LINUX-I386: test7d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test7d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test7d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test7d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test8a:  Passing addr-of to function call
+;          no ssp attribute
+; Requires no protector.
+define void @test8a() nounwind uwtable {
+entry:
+; LINUX-I386: test8a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test8a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test8a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test8a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8b:  Passing addr-of to function call
+;          ssp attribute
+; Requires no protector.
+define void @test8b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test8b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test8b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test8b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test8b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8c:  Passing addr-of to function call
+;          sspstrong attribute
+; Requires protector.
+define void @test8c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test8c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test8c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test8c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test8c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8d:  Passing addr-of to function call
+;          sspreq attribute
+; Requires protector.
+define void @test8d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test8d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test8d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test8d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test8d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test9a:  Addr-of in select instruction
+;          no ssp attribute
+; Requires no protector.
+define void @test9a() nounwind uwtable {
+entry:
+; LINUX-I386: test9a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test9a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test9a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test9a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9b:  Addr-of in select instruction
+;          ssp attribute
+; Requires no protector.
+define void @test9b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test9b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test9b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test9b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test9b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9c:  Addr-of in select instruction
+;          sspstrong attribute
+; Requires protector.
+define void @test9c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test9c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test9c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test9c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test9c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9d:  Addr-of in select instruction
+;          sspreq attribute
+; Requires protector.
+define void @test9d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test9d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test9d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test9d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test9d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test10a: Addr-of in phi instruction
+;          no ssp attribute
+; Requires no protector.
+define void @test10a() nounwind uwtable {
+entry:
+; LINUX-I386: test10a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test10a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test10a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test10a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10b: Addr-of in phi instruction
+;          ssp attribute
+; Requires no protector.
+define void @test10b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test10b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test10b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test10b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test10b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10c: Addr-of in phi instruction
+;          sspstrong attribute
+; Requires protector.
+define void @test10c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test10c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test10c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test10c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test10c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10d: Addr-of in phi instruction
+;          sspreq attribute
+; Requires protector.
+define void @test10d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test10d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test10d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test10d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test10d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test11a: Addr-of struct element. (GEP followed by store).
+;          no ssp attribute
+; Requires no protector.
+define void @test11a() nounwind uwtable {
+entry:
+; LINUX-I386: test11a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test11a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test11a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test11a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11b: Addr-of struct element. (GEP followed by store).
+;          ssp attribute
+; Requires no protector.
+define void @test11b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test11b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test11b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test11b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test11b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11c: Addr-of struct element. (GEP followed by store).
+;          sspstrong attribute
+; Requires protector.
+define void @test11c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test11c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test11c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test11c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test11c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11d: Addr-of struct element. (GEP followed by store).
+;          sspreq attribute
+; Requires protector.
+define void @test11d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test11d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test11d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test11d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test11d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test12a: Addr-of struct element, GEP followed by ptrtoint.
+;          no ssp attribute
+; Requires no protector.
+define void @test12a() nounwind uwtable {
+entry:
+; LINUX-I386: test12a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test12a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test12a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test12a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12b: Addr-of struct element, GEP followed by ptrtoint.
+;          ssp attribute
+; Requires no protector.
+define void @test12b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test12b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test12b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test12b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test12b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12c: Addr-of struct element, GEP followed by ptrtoint.
+;          sspstrong attribute
+; Requires protector.
+define void @test12c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test12c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test12c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test12c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test12c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12d: Addr-of struct element, GEP followed by ptrtoint.
+;          sspreq attribute
+; Requires protector.
+define void @test12d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test12d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test12d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test12d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test12d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test13a: Addr-of struct element, GEP followed by callinst.
+;          no ssp attribute
+; Requires no protector.
+define void @test13a() nounwind uwtable {
+entry:
+; LINUX-I386: test13a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test13a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test13a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test13a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13b: Addr-of struct element, GEP followed by callinst.
+;          ssp attribute
+; Requires no protector.
+define void @test13b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test13b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test13b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test13b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test13b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13c: Addr-of struct element, GEP followed by callinst.
+;          sspstrong attribute
+; Requires protector.
+define void @test13c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test13c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test13c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test13c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test13c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13d: Addr-of struct element, GEP followed by callinst.
+;          sspreq attribute
+; Requires protector.
+define void @test13d() nounwind uwtable sspreq {
 entry:
-	%a_addr = alloca i8*		; <i8**> [#uses=2]
-	%buf = alloca [8 x i8]		; <[8 x i8]*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i8* %a, i8** %a_addr
-	%buf1 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	%0 = load i8** %a_addr, align 4		; <i8*> [#uses=1]
-	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
-	%buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
-	br label %return
+; LINUX-I386: test13d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test13d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
 
-return:		; preds = %entry
-	ret void
+; LINUX-KERNEL-X64: test13d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test13d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
 }
 
-declare i8* @strcpy(i8*, i8*) nounwind
+; test14a: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          no ssp attribute
+; Requires no protector.
+define void @test14a() nounwind uwtable {
+entry:
+; LINUX-I386: test14a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test14a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test14a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test14a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14b: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          ssp attribute
+; Requires no protector.
+define void @test14b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test14b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test14b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test14b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test14b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14c: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          sspstrong attribute
+; Requires protector.
+define void @test14c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test14c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test14c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test14c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test14c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14d: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          sspreq  attribute
+; Requires protector.
+define void @test14d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test14d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test14d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test14d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test14d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test15a: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          no ssp attribute
+; Requires no protector.
+define void @test15a() nounwind uwtable {
+entry:
+; LINUX-I386: test15a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test15a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test15a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test15a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15b: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          ssp attribute
+; Requires no protector.
+define void @test15b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test15b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test15b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test15b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test15b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15c: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspstrong attribute
+; Requires protector.
+define void @test15c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test15c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test15c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test15c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test15c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15d: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspreq attribute
+; Requires protector.
+define void @test15d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test15d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test15d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test15d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test15d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test16a: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          no ssp attribute
+; Requires no protector.
+define void @test16a() nounwind uwtable {
+entry:
+; LINUX-I386: test16a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test16a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test16a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test16a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16b: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          ssp attribute
+; Requires no protector.
+define void @test16b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test16b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test16b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test16b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test16b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16c: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspstrong attribute
+; Requires protector.
+define void @test16c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test16c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test16c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test16c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test16c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16d: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspreq attribute
+; Requires protector.
+define void @test16d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test16d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test16d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test16d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test16d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test17a: Addr-of a vector nested in a struct
+;          no ssp attribute
+; Requires no protector.
+define void @test17a() nounwind uwtable {
+entry:
+; LINUX-I386: test17a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test17a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test17a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test17a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17b: Addr-of a vector nested in a struct
+;          ssp attribute
+; Requires no protector.
+define void @test17b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test17b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test17b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test17b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test17b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17c: Addr-of a vector nested in a struct
+;          sspstrong attribute
+; Requires protector.
+define void @test17c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test17c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test17c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test17c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test17c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17d: Addr-of a vector nested in a struct
+;          sspreq attribute
+; Requires protector.
+define void @test17d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test17d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test17d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test17d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test17d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test18a: Addr-of a variable passed into an invoke instruction.
+;          no ssp attribute
+; Requires no protector.
+define i32 @test18a() uwtable {
+entry:
+; LINUX-I386: test18a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test18a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test18a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test18a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18b: Addr-of a variable passed into an invoke instruction.
+;          ssp attribute
+; Requires no protector.
+define i32 @test18b() uwtable ssp {
+entry:
+; LINUX-I386: test18b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test18b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test18b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test18b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18c: Addr-of a variable passed into an invoke instruction.
+;          sspstrong attribute
+; Requires protector.
+define i32 @test18c() uwtable sspstrong {
+entry:
+; LINUX-I386: test18c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test18c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test18c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test18c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18d: Addr-of a variable passed into an invoke instruction.
+;          sspreq attribute
+; Requires protector.
+define i32 @test18d() uwtable sspreq {
+entry:
+; LINUX-I386: test18d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test18d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test18d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test18d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19a: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          no ssp attribute
+; Requires no protector.
+define i32 @test19a() uwtable {
+entry:
+; LINUX-I386: test19a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test19a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test19a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test19a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19b: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          ssp attribute
+; Requires no protector.
+define i32 @test19b() uwtable ssp {
+entry:
+; LINUX-I386: test19b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test19b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test19b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test19b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19c: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          sspstrong attribute
+; Requires protector.
+define i32 @test19c() uwtable sspstrong {
+entry:
+; LINUX-I386: test19c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test19c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test19c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test19c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19d: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          sspreq attribute
+; Requires protector.
+define i32 @test19d() uwtable sspreq {
+entry:
+; LINUX-I386: test19d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test19d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test19d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test19d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test20a: Addr-of a pointer
+;          no ssp attribute
+; Requires no protector.
+define void @test20a() nounwind uwtable {
+entry:
+; LINUX-I386: test20a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test20a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test20a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test20a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20b: Addr-of a pointer
+;          ssp attribute
+; Requires no protector.
+define void @test20b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test20b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test20b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test20b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test20b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20c: Addr-of a pointer
+;          sspstrong attribute
+; Requires protector.
+define void @test20c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test20c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test20c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test20c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test20c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20d: Addr-of a pointer
+;          sspreq attribute
+; Requires protector.
+define void @test20d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test20d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test20d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test20d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test20d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test21a: Addr-of a casted pointer
+;          no ssp attribute
+; Requires no protector.
+define void @test21a() nounwind uwtable {
+entry:
+; LINUX-I386: test21a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test21a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test21a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test21a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21b: Addr-of a casted pointer
+;          ssp attribute
+; Requires no protector.
+define void @test21b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test21b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test21b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test21b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test21b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21c: Addr-of a casted pointer
+;          sspstrong attribute
+; Requires protector.
+define void @test21c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test21c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test21c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test21c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test21c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21d: Addr-of a casted pointer
+;          sspreq attribute
+; Requires protector.
+define void @test21d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test21d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test21d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test21d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test21d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test22a: [2 x i8] in a class
+;          no ssp attribute
+; Requires no protector.
+define signext i8 @test22a() nounwind uwtable {
+entry:
+; LINUX-I386: test22a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test22a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test22a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test22a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22b: [2 x i8] in a class
+;          ssp attribute
+; Requires no protector.
+define signext i8 @test22b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test22b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test22b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test22b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test22b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22c: [2 x i8] in a class
+;          sspstrong attribute
+; Requires protector.
+define signext i8 @test22c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test22c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test22c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test22c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test22c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22d: [2 x i8] in a class
+;          sspreq attribute
+; Requires protector.
+define signext i8 @test22d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test22d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test22d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test22d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test22d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23a: [2 x i8] nested in several layers of structs and unions
+;          no ssp attribute
+; Requires no protector.
+define signext i8 @test23a() nounwind uwtable {
+entry:
+; LINUX-I386: test23a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test23a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test23a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test23a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23b: [2 x i8] nested in several layers of structs and unions
+;          ssp attribute
+; Requires no protector.
+define signext i8 @test23b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test23b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test23b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test23b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test23b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23c: [2 x i8] nested in several layers of structs and unions
+;          sspstrong attribute
+; Requires protector.
+define signext i8 @test23c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test23c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test23c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test23c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test23c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23d: [2 x i8] nested in several layers of structs and unions
+;          sspreq attribute
+; Requires protector.
+define signext i8 @test23d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test23d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test23d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test23d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test23d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test24a: Variable sized alloca
+;          no ssp attribute
+; Requires no protector.
+define void @test24a(i32 %n) nounwind uwtable {
+entry:
+; LINUX-I386: test24a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test24a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test24a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test24a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24b: Variable sized alloca
+;          ssp attribute
+; Requires protector.
+define void @test24b(i32 %n) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test24b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24c: Variable sized alloca
+;          sspstrong attribute
+; Requires protector.
+define void @test24c(i32 %n) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test24c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24d: Variable sized alloca
+;          sspreq attribute
+; Requires protector.
+define void @test24d(i32 %n) nounwind uwtable sspreq  {
+entry:
+; LINUX-I386: test24d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test25a: array of [4 x i32]
+;          no ssp attribute
+; Requires no protector.
+define i32 @test25a() nounwind uwtable {
+entry:
+; LINUX-I386: test25a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test25a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test25a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test25a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25b: array of [4 x i32]
+;          ssp attribute
+; Requires no protector, except for Darwin which _does_ require a protector.
+define i32 @test25b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test25b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test25b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test25b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test25b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25c: array of [4 x i32]
+;          sspstrong attribute
+; Requires protector.
+define i32 @test25c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test25c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test25c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test25c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test25c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25d: array of [4 x i32]
+;          sspreq attribute
+; Requires protector.
+define i32 @test25d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test25d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test25d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test25d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test25d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test26: Nested structure, no arrays, no address-of expressions.
+;         Verify that the resulting gep-of-gep does not incorrectly trigger
+;         a stack protector.
+;         ssptrong attribute
+; Requires no protector.
+define void @test26() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test26:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test26:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test26:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test26:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.nest, align 4
+  %b = getelementptr inbounds %struct.nest* %c, i32 0, i32 1
+  %_a = getelementptr inbounds %struct.pair* %b, i32 0, i32 0
+  %0 = load i32* %_a, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0)
+  ret void
+}
+
+; test27: Address-of a structure taken in a function with a loop where
+;         the alloca is an incoming value to a PHI node and a use of that PHI 
+;         node is also an incoming value.
+;         Verify that the address-of analysis does not get stuck in infinite
+;         recursion when chasing the alloca through the PHI nodes.
+; Requires protector.
+define i32 @test27(i32 %arg) nounwind uwtable sspstrong {
+bb:
+; LINUX-I386: test27:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test27:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test27:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test27:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %tmp = alloca %struct.small*, align 8
+  %tmp1 = call i32 (...)* @dummy(%struct.small** %tmp) nounwind
+  %tmp2 = load %struct.small** %tmp, align 8
+  %tmp3 = ptrtoint %struct.small* %tmp2 to i64
+  %tmp4 = trunc i64 %tmp3 to i32
+  %tmp5 = icmp sgt i32 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb21
+
+bb6:                                              ; preds = %bb17, %bb
+  %tmp7 = phi %struct.small* [ %tmp19, %bb17 ], [ %tmp2, %bb ]
+  %tmp8 = phi i64 [ %tmp20, %bb17 ], [ 1, %bb ]
+  %tmp9 = phi i32 [ %tmp14, %bb17 ], [ %tmp1, %bb ]
+  %tmp10 = getelementptr inbounds %struct.small* %tmp7, i64 0, i32 0
+  %tmp11 = load i8* %tmp10, align 1
+  %tmp12 = icmp eq i8 %tmp11, 1
+  %tmp13 = add nsw i32 %tmp9, 8
+  %tmp14 = select i1 %tmp12, i32 %tmp13, i32 %tmp9
+  %tmp15 = trunc i64 %tmp8 to i32
+  %tmp16 = icmp eq i32 %tmp15, %tmp4
+  br i1 %tmp16, label %bb21, label %bb17
+
+bb17:                                             ; preds = %bb6
+  %tmp18 = getelementptr inbounds %struct.small** %tmp, i64 %tmp8
+  %tmp19 = load %struct.small** %tmp18, align 8
+  %tmp20 = add i64 %tmp8, 1
+  br label %bb6
+
+bb21:                                             ; preds = %bb6, %bb
+  %tmp22 = phi i32 [ %tmp1, %bb ], [ %tmp14, %bb6 ]
+  %tmp23 = call i32 (...)* @dummy(i32 %tmp22) nounwind
+  ret i32 undef
+}
 
-declare i32 @printf(i8*, ...) nounwind
+declare double @testi_aux()
+declare i8* @strcpy(i8*, i8*)
+declare i32 @printf(i8*, ...)
+declare void @funcall(i32*)
+declare void @funcall2(i32**)
+declare void @funfloat(float*)
+declare void @funfloat2(float**)
+declare void @_Z3exceptPi(i32*)
+declare i32 @__gxx_personality_v0(...)
+declare i32* @getp()
+declare i32 @dummy(...)
diff --git a/test/CodeGen/X86/stack-update-frame-opcode.ll b/test/CodeGen/X86/stack-update-frame-opcode.ll
new file mode 100644
index 0000000..9a5a242
--- /dev/null
+++ b/test/CodeGen/X86/stack-update-frame-opcode.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CORE_LP64 %s
+; RUN: llc -mtriple=x86_64-pc-linux -mcpu=atom < %s | FileCheck -check-prefix=ATOM_LP64 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -mcpu=corei7 < %s | FileCheck -check-prefix=CORE_ILP32 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -mcpu=atom < %s | FileCheck -check-prefix=ATOM_ILP32 %s
+
+define i32 @bar(i32 %a) nounwind {
+entry:
+  %arr = alloca [400 x i32], align 16
+
+; There is a 2x2 variation matrix here:
+; Atoms use LEA to update the SP. Opcode bitness depends on data model.
+; Cores use sub/add to update the SP. Opcode bitness depends on data model.
+
+; CORE_LP64: subq $1608
+; CORE_ILP32: subl $1608
+; ATOM_LP64: leaq -1608
+; ATOM_ILP32: leal -1608
+
+  %arraydecay = getelementptr inbounds [400 x i32]* %arr, i64 0, i64 0
+  %call = call i32 @foo(i32 %a, i32* %arraydecay) nounwind
+  ret i32 %call
+
+; CORE_LP64: addq $1608
+; CORE_ILP32: addl $1608
+; ATOM_LP64: leaq 1608
+; ATOM_ILP32: leal 1608
+
+}
+
+declare i32 @foo(i32, i32*)
+
diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll
new file mode 100644
index 0000000..cd67729
--- /dev/null
+++ b/test/CodeGen/X86/subtarget-feature-change.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; This should not generate SSE instructions:
+;
+; CHECK: without.sse:
+; CHECK: flds
+; CHECK: fmuls
+; CHECK: fstps
+define void @without.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #0 {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %mul = fmul float %0, %1
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; This should generate SSE instructions:
+;
+; CHECK: with.sse
+; CHECK: movss
+; CHECK: mulss
+; CHECK: movss
+define void @with.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #1 {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %mul = fmul float %0, %1
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" }
+attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/tailcall-structret.ll b/test/CodeGen/X86/tailcall-structret.ll
index d8be4b2..dcfefe8 100644
--- a/test/CodeGen/X86/tailcall-structret.ll
+++ b/test/CodeGen/X86/tailcall-structret.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | FileCheck %s
 define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
 entry:
       %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
       ret { { i8*, i8* }*, i8*} %2
+; CHECK: jmp init
 }
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 118eee6..9a0b57c 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-; RUN: llc < %s -march=x86 -tailcallopt | grep "movl[[:space:]]*4(%esp), %eax" | count 1
+; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | FileCheck %s
 %struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32 }
@@ -9,10 +8,14 @@ entry:
         %tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
         %tmp3 = load i32* %tmp2
         ret i32 %tmp3
+; CHECK: tailcallee
+; CHECK: movl 4(%esp), %eax
 }
 
 define  fastcc i32 @tailcaller(%struct.s* byval %a) nounwind {
 entry:
         %tmp4 = tail call fastcc i32 @tailcallee(%struct.s* byval %a )
         ret i32 %tmp4
+; CHECK: tailcaller
+; CHECK: jmp tailcallee
 }
diff --git a/test/CodeGen/X86/tailcallfp.ll b/test/CodeGen/X86/tailcallfp.ll
index c0b609a..22a7930 100644
--- a/test/CodeGen/X86/tailcallfp.ll
+++ b/test/CodeGen/X86/tailcallfp.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
 define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
      %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
      ret i32 %Y
+; CHECK: jmpl
 }
diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll
index 60e3be5..ff590a1 100644
--- a/test/CodeGen/X86/tailcallpic1.ll
+++ b/test/CodeGen/X86/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
 
 define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
@@ -9,4 +9,5 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
+; CHECK: jmp tailcallee
 }
diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll
index eaa7631..1b6bdb7 100644
--- a/test/CodeGen/X86/tailcallpic2.ll
+++ b/test/CodeGen/X86/tailcallpic2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
 
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
@@ -9,4 +9,7 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
+; CHECK: movl tailcallee@GOT
+; CHECK: jmpl
 }
+
diff --git a/test/CodeGen/X86/v8i1-masks.ll b/test/CodeGen/X86/v8i1-masks.ll
index abb4b39..8cbfb5d 100644
--- a/test/CodeGen/X86/v8i1-masks.ll
+++ b/test/CodeGen/X86/v8i1-masks.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
 
 ;CHECK: and_masks
-;CHECK: vmovups
+;CHECK: vmovaps
 ;CHECK: vcmpltp
 ;CHECK: vcmpltp
 ;CHECK: vandps
 ;CHECK: vandps
-;CHECK: vmovups
+;CHECK: vmovaps
 ;CHECK: ret
 
 define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
@@ -17,7 +17,7 @@ define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwi
   %m1 = fcmp olt <8 x float> %v2, %v0
   %mand = and <8 x i1> %m1, %m0
   %r = zext <8 x i1> %mand to <8 x i32>
-  store <8 x i32> %r, <8 x i32>* undef, align 16
+  store <8 x i32> %r, <8 x i32>* undef, align 32
   ret void
 }
 
@@ -25,7 +25,7 @@ define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwi
 ;CHECK: vcmpltps
 ;CHECK: vxorps
 ;CHECK: vandps
-;CHECK: vmovups
+;CHECK: vmovaps
 ;CHECK: ret
 define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
   %v0 = load <8 x float>* %a, align 16
@@ -33,7 +33,7 @@ define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwi
   %m0 = fcmp olt <8 x float> %v1, %v0
   %mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
   %r = zext <8 x i1> %mand to <8 x i32>
-  store <8 x i32> %r, <8 x i32>* undef, align 16
+  store <8 x i32> %r, <8 x i32>* undef, align 32
   ret void
 }
 
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
index dc0464f..e4a8f46 100644
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -29,8 +29,8 @@ entry:
 ; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
 ; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
 ; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
-; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
 ; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
   %0 = load <8 x float>* %in
   %1 = fpext <8 x float> %0 to <8 x double>
   store <8 x double> %1, <8 x double>* %out, align 1
diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll
new file mode 100644
index 0000000..35e052d
--- /dev/null
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=+avx2 | FileCheck %s
+
+
+define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
+entry:
+; CHECK: sdiv_vec8x16
+; CHECK: psraw  $15
+; CHECK: vpsrlw  $11
+; CHECK: vpaddw
+; CHECK: vpsraw  $5
+; CHECK: ret
+  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %0
+}
+
+define <4 x i32> @sdiv_zero(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_zero
+; CHECK-NOT sra
+; CHECK: ret
+  %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_vec4x32
+; CHECK: vpsrad $31
+; CHECK: vpsrld $28
+; CHECK: vpaddd
+; CHECK: vpsrad $4
+; CHECK: ret
+%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
+ret <4 x i32> %0
+}
+
+define <4 x i32> @sdiv_negative(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_negative
+; CHECK: vpsrad $31
+; CHECK: vpsrld $28
+; CHECK: vpaddd
+; CHECK: vpsrad $4
+; CHECK: vpsubd
+; CHECK: ret
+%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
+ret <4 x i32> %0
+}
+
+define <8 x i32> @sdiv8x32(<8 x i32> %var) {
+entry:
+; CHECK: sdiv8x32
+; CHECK: vpsrad $31
+; CHECK: vpsrld $26
+; CHECK: vpaddd
+; CHECK: vpsrad $6
+; CHECK: ret
+%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
+ret <8 x i32> %0
+}
+
+define <16 x i16> @sdiv16x16(<16 x i16> %var) {
+entry:
+; CHECK: sdiv16x16
+; CHECK: vpsraw  $15
+; CHECK: vpsrlw  $14
+; CHECK: vpaddw
+; CHECK: vpsraw  $2
+; CHECK: ret
+  %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  ret <16 x i16> %a0
+}
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index f105de4..5c668b7 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd | count 1
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s
 
 define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
@@ -23,4 +23,11 @@ define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp73.upgrd.1 = bitcast <16 x i8> %tmp73 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P
 	ret void
+
+; CHECK: test:
+; CHECK-NOT: pshufd
+; CHECK: punpcklbw
+; CHECK: punpcklbw
+; CHECK: pshufd $0
+; CHECK-NOT: pshufd
 }
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index feacc42..cf0ecf4 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,55 +1,230 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
-; RUN: grep punpcklwd %t | count 4
-; RUN: grep punpckhwd %t | count 4
-; RUN: grep "pshufd" %t | count 8
+; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
 
 ; Splat test for v8i16
-; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used twice)
 define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_0:
+; CHECK: pshuflw $0
 }
 
 define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_1:
+; CHECK: pshuflw $5
 }
 
 define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_2:
+; CHECK: punpcklwd
+; CHECK-NEXT: pshufd $-86
 }
 
 define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_3:
+; CHECK: pshuflw $15
 }
 
 define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_4:
+; CHECK: movhlps
 }
 
 define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_5:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $85
 }
 
 define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
-}
 
+; CHECK: shuf_8i16_6:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $-86
+}
 
 define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_7:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $-1
+}
+
+; Splat test for v16i8
+define <16 x i8> @shuf_16i8_8(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_8:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_9(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_9:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_10(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_10:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_11(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_11:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-1
+}
+
+
+define <16 x i8> @shuf_16i8_12(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_12:
+; CHECK: pshufd $5
+}
+
+define <16 x i8> @shuf_16i8_13(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_13:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_14(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_14:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_15(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_15:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-1
+}
+
+define <16 x i8> @shuf_16i8_16(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_16:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_17(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_17:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_18(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_18:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_19(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_19:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-1
+}
+
+define <16 x i8> @shuf_16i8_20(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_20:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_21(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_21:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_22(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_22:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_23(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_23:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-1
 }
diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
deleted file mode 100644
index 374acfa..0000000
--- a/test/CodeGen/X86/vec_splat-4.ll
+++ /dev/null
@@ -1,104 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
-; RUN: grep punpcklbw %t | count 16
-; RUN: grep punpckhbw %t | count 16
-; RUN: grep "pshufd" %t | count 16
-
-; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used 4 times)
-
-; Splat test for v16i8
-define <16 x i8 > @shuf_16i8_0(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0 , i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_1(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_2(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2 , i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_3(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3 , i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3 >
-	ret <16 x i8 > %tmp6
-}
-
-
-define <16 x i8 > @shuf_16i8_4(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_5(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5 , i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_6(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6 , i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_7(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef , i32 undef, i32 undef, i32 undef , i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_8(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8 , i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_9(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9 , i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_10(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10 , i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_11(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11 , i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_12(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12 , i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_13(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13 , i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_14(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14 , i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_15(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15 , i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15 >
-	ret <16 x i8 > %tmp6
-}
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 24d8487..deedee8 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse3 | grep movddup
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
@@ -10,6 +10,12 @@ define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp10, <4 x float>* %P
 	ret void
+
+; SSE2: test_v4sf:
+; SSE2: pshufd $0
+
+; SSE3: test_v4sf:
+; SSE3: pshufd $0
 }
 
 define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
@@ -19,4 +25,10 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
 	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
 	store <2 x double> %tmp6, <2 x double>* %P
 	ret void
+
+; SSE2: test_v2sd:
+; SSE2: shufpd $0
+
+; SSE3: test_v2sd:
+; SSE3: movddup
 }
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index d08e2a0..ec93ce0 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -8,10 +8,8 @@ entry:
   %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
   %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
   %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
-;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-;CHECK: pslld $2
 ;CHECK: padd
   %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
   ret <4 x i32*> %A3
@@ -21,7 +19,6 @@ entry:
 ;CHECK: AGEP1:
 define i32 @AGEP1(<4 x i32*> %param) nounwind {
 entry:
-;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   %k = extractelement <4 x i32*> %A2, i32 3
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
index 596b426..7f8ae07 100644
--- a/test/CodeGen/X86/win_ftol2.ll
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=FTOL
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
 ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
diff --git a/test/CodeGen/X86/x86-64-ptr-arg-simple.ll b/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
new file mode 100644
index 0000000..6d46663
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
+
+; %in is kept in %esi for both ABIs. But the pointer will be passed in %edi
+; for x32, not %rdi
+
+; CHECK: movl %esi, (%rdi)
+; X32ABI: movl %esi, (%edi)
+
+define void @foo(i32* nocapture %out, i32 %in) nounwind {
+entry:
+  store i32 %in, i32* %out, align 4
+  ret void
+}
+
+; CHECK: bar
+; CHECK: movl (%rsi), %eax
+
+; Similarly here, but for loading
+; X32ABI: bar
+; X32ABI: movl (%esi), %eax
+
+define void @bar(i32* nocapture %pOut, i32* nocapture %pIn) nounwind {
+entry:
+  %0 = load i32* %pIn, align 4
+  store i32 %0, i32* %pOut, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
index 7b5f189..bc8a543 100644
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@@ -1,11 +1,16 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
-	%struct.foo = type { [4 x i64] }
+%struct.foo = type { [4 x i64] }
 
 ; CHECK: bar:
 ; CHECK: movq %rdi, %rax
+
+; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; X32ABI: bar:
+; X32ABI: movl %edi, %eax
+
 define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
 entry:
 	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
@@ -57,6 +62,11 @@ return:		; preds = %entry
 
 ; CHECK: foo:
 ; CHECK: movq %rdi, %rax
+
+; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; X32ABI: foo:
+; X32ABI: movl %edi, %eax
+
 define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
   store { i64 } { i64 0 }, { i64 }* %agg.result
   ret void
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index e0371d6..6efce1a 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -10,10 +10,8 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 12, metadata !"fb.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 139632)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"fb.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
index 01db617..19be3f2 100644
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ b/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -13,10 +13,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 12, metadata !"cf.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 139632)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"cf.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index 2557c9c..c1a88e1 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -49,6 +49,5 @@ entry:
 !21 = metadata !{i32 9, i32 0, metadata !11, metadata !17}
 !22 = metadata !{i32 11, i32 0, metadata !11, metadata !17}
 !23 = metadata !{i32 16, i32 0, metadata !18, null}
-!24 = metadata !{metadata !25}
-!25 = metadata !{metadata !9, metadata !10}
+!24 = metadata !{metadata !9, metadata !10}
 
diff --git a/test/DebugInfo/AArch64/cfi-frame.ll b/test/DebugInfo/AArch64/cfi-frame.ll
new file mode 100644
index 0000000..7290ddf
--- /dev/null
+++ b/test/DebugInfo/AArch64/cfi-frame.ll
@@ -0,0 +1,58 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-WITH-FP
+
+@bigspace = global [8 x i64] zeroinitializer
+
+declare void @use_addr(i8*)
+
+define void @test_frame([8 x i64] %val) {
+; CHECK: test_frame:
+; CHECK: .cfi_startproc
+
+  %var = alloca i8, i32 1000000
+; CHECK: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]]
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]]
+
+; Make sure the prologue is reasonably efficient
+; CHECK-NEXT: stp x29, x30, [sp,
+; CHECK-NEXT: stp x25, x26, [sp,
+; CHECK-NEXT: stp x23, x24, [sp,
+; CHECK-NEXT: stp x21, x22, [sp,
+; CHECK-NEXT: stp x19, x20, [sp,
+; CHECK-NEXT: sub sp, sp, #160
+; CHECK-NEXT: sub sp, sp, #244, lsl #12
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_def_cfa sp, 1000080
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_offset x30, -8
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_offset x29, -16
+; [...]
+; CHECK: .cfi_offset x19, -80
+
+; CHECK: bl use_addr
+  call void @use_addr(i8* %var)
+
+  store [8 x i64] %val, [8 x i64]* @bigspace
+  ret void
+; CHECK: ret
+; CHECK: .cfi_endproc
+}
+
+; CHECK-WITH-FP: test_frame:
+
+; CHECK-WITH-FP: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]]
+; CHECK-WITH-FP-NEXT: .Ltmp
+; CHECK-WITH-FP-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]]
+
+; CHECK-WITH-FP: stp x29, x30, [sp, [[OFFSET:#[0-9]+]]]
+; CHECK-WITH-FP-NEXT: add x29, sp, [[OFFSET]]
+; CHECK-WITH-FP-NEXT: .Ltmp
+; CHECK-WITH-FP-NEXT: .cfi_def_cfa x29, 16
+
+  ; We shouldn't emit any kind of update for the second stack adjustment if the
+  ; FP is in use.
+; CHECK-WITH-FP-NOT: .cfi_def_cfa_offset
+
+; CHECK-WITH-FP: bl use_addr
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
new file mode 100644
index 0000000..b94f775
--- /dev/null
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=aarch64-non-linux-gnu < %s -filetype=obj | llvm-dwarfdump - | FileCheck %s
+
+; We're mostly checking that relocations are applied correctly
+; here. Currently R_AARCH64_ABS32 is used for references to debug data
+; and R_AARCH64_ABS64 is used for program addresses.
+
+; A couple of ABS32s, both at 0 and elsewhere, interpreted correctly:
+
+; CHECK: DW_AT_producer [DW_FORM_strp] ( .debug_str[0x00000000] = "clang version 3.3 ")
+; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000013] = "tmp.c")
+
+; A couple of ABS64s similarly:
+
+; CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
+; CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000008)
+
+define i32 @main() nounwind {
+  ret i32 0, !dbg !8
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"tmp.c", metadata !"/home/tim/llvm/build", metadata !"clang version 3.3 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"main", metadata !"main", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!4 = metadata !{i32 786473, metadata !"tmp.c", metadata !"/home/tim/llvm/build", null} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{i32 2, i32 0, metadata !3, null}
diff --git a/test/DebugInfo/AArch64/eh_frame.ll b/test/DebugInfo/AArch64/eh_frame.ll
new file mode 100644
index 0000000..2539c56
--- /dev/null
+++ b/test/DebugInfo/AArch64/eh_frame.ll
@@ -0,0 +1,51 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu %s -filetype=obj -o %t
+; RUN: llvm-objdump -s %t | FileCheck %s
+@var = global i32 0
+
+declare void @bar()
+
+define i64 @check_largest_class(i32 %in)  {
+  %res = load i32* @var
+  call void @bar()
+  %ext = zext i32 %res to i64
+  ret i64 %ext
+}
+
+; The really key points we're checking here are:
+;  * Return register is x30.
+;  * Pointer format is 0x1b (GNU doesn't appear to understand others).
+
+; The rest is largely incidental, but not expected to change regularly.
+
+; Output is:
+
+; CHECK: Contents of section .eh_frame:
+; CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01  .........zR..|..
+; CHECK-NEXT: 0010 1b0c1f00 18000000 18000000 00000000  ................
+
+
+; Won't check the rest, it's rather incidental.
+; 0020 24000000 00440c1f 10449e02 93040000  $....D...D......
+
+
+; The first CIE:
+; -------------------
+; 10000000: length of first CIE = 0x10
+; 00000000: This is a CIE
+; 01: version = 0x1
+; 7a 52 00: augmentation string "zR" -- pointer format is specified
+; 01: code alignment factor 1
+; 7c: data alignment factor -4
+; 1e: return address register 30 (== x30).
+; 01: 1 byte of augmentation
+; 1b: pointer format 1b: DW_EH_PE_pcrel | DW_EH_PE_sdata4
+; 0c 1f 00: initial instructions: "DW_CFA_def_cfa x31 ofs 0" in this case
+
+; Next the FDE:
+; -------------
+; 18000000: FDE length 0x18
+; 18000000: Uses CIE 0x18 backwards (only coincidentally same as above)
+; 00000000: PC begin for this FDE is at 00000000 (relocation is applied here)
+; 24000000: FDE applies up to PC begin+0x24
+; 00: Augmentation string length 0 for this FDE
+; Rest: call frame instructions
diff --git a/test/DebugInfo/AArch64/eh_frame_personality.ll b/test/DebugInfo/AArch64/eh_frame_personality.ll
new file mode 100644
index 0000000..d35f2a2
--- /dev/null
+++ b/test/DebugInfo/AArch64/eh_frame_personality.ll
@@ -0,0 +1,46 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu %s -filetype=obj -o %t
+; RUN: llvm-objdump -s %t | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @bar()
+
+define i64 @foo(i64 %lhs, i64 %rhs) {
+  invoke void @bar() to label %end unwind label %clean
+end:
+ ret i64 0
+
+clean:
+  %tst = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup
+  ret i64 42
+}
+
+; CHECK: Contents of section .eh_frame:
+; CHECK: 0000 1c000000 00000000 017a504c 5200017c  .........zPLR..|
+; CHECK: 0010 1e0b0000 00000000 00000000 1b0c1f00  ................
+
+; Don't really care about the rest:
+
+; 0020 1c000000 24000000 00000000 24000000  ....$.......$...
+; 0030 08000000 00000000 00440c1f 10449e02  .........D...D..
+
+; The key test here is that the personality routine is sanely encoded (under the
+; small memory model it must be an 8-byte value for full generality: code+data <
+; 4GB, but you might need both +4GB and -4GB depending on where things end
+; up. However, for completeness:
+
+; First CIE:
+; ----------
+; 1c000000: Length = 0x1c
+; 00000000: This is a CIE
+; 01: Version 1
+; 7a 50 4c 52 00: Augmentation string "zPLR" (personality routine, language-specific data, pointer format)
+; 01: Code alignment factor 1
+; 78: Data alignment factor: -8
+; 1e: Return address in x30
+; 07: Augmentation data 0xb bytes (this is key!)
+; 00: Personality encoding is DW_EH_PE_absptr
+; 00 00 00 00 00 00 00 00: First part of aug (personality routine). Relocated, obviously
+; 00: Second part of aug (language-specific data): absolute pointer format used
+; 1b: pointer format: pc-relative signed 4-byte. Just like GNU.
+; 0c 1f 00: Initial instructions ("DW_CFA_def_cfa x31 ofs 0" in this case)
diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg
new file mode 100644
index 0000000..c5ce241
--- /dev/null
+++ b/test/DebugInfo/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll
new file mode 100644
index 0000000..9a7f7db
--- /dev/null
+++ b/test/DebugInfo/AArch64/variable-loc.ll
@@ -0,0 +1,97 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+
+; This is a regression test making sure the location of variables is correct in
+; debugging information, even if they're addressed via the frame pointer.
+
+; In case it needs, regenerating, the following suffices:
+; int printf(const char *, ...);
+; void populate_array(int *, int);
+; int sum_array(int *, int);
+
+; int main() {
+;     int main_arr[100], val;
+;     populate_array(main_arr, 100);
+;     val = sum_array(main_arr, 100);
+;     printf("Total is %d\n", val);
+;     return 0;
+; }
+
+  ; First make sure main_arr is where we expect it: sp + 12 == x29 - 420:
+; CHECK: main:
+; CHECK: sub sp, sp, #448
+; CHECK: stp x29, x30, [sp, #432]
+; CHECK: add x29, sp, #432
+; CHECK: add {{x[0-9]+}}, sp, #12
+
+  ; Now check the debugging information reflects this:
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: .word .Linfo_string7
+
+  ; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is LEB128 encoded -420.
+; CHECK: DW_AT_location
+; CHECK-NEXT: .byte 145
+; CHECK-NEXT: .ascii "\334|"
+
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: main_arr
+
+
+target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128"
+target triple = "aarch64-none-linux-gnu"
+
+@.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 1
+
+declare void @populate_array(i32*, i32) nounwind
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @sum_array(i32*, i32) nounwind
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %main_arr = alloca [100 x i32], align 4
+  %val = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{[100 x i32]* %main_arr}, metadata !17), !dbg !22
+  call void @llvm.dbg.declare(metadata !{i32* %val}, metadata !23), !dbg !24
+  %arraydecay = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !25
+  call void @populate_array(i32* %arraydecay, i32 100), !dbg !25
+  %arraydecay1 = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26
+  %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26
+  store i32 %call, i32* %val, align 4, !dbg !26
+  %0 = load i32* %val, align 4, !dbg !27
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27
+  ret i32 0, !dbg !28
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build", metadata !"clang version 3.2 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !11, metadata !14}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
+!6 = metadata !{i32 786473, metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !10}
+!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{i32 786478, i32 0, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
+!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{metadata !10, metadata !9, metadata !10}
+!14 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !10}
+!17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
+!18 = metadata !{i32 786443, metadata !14, i32 18, i32 16, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
+!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
+!22 = metadata !{i32 19, i32 7, metadata !18, null}
+!23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
+!24 = metadata !{i32 20, i32 7, metadata !18, null}
+!25 = metadata !{i32 22, i32 3, metadata !18, null}
+!26 = metadata !{i32 23, i32 9, metadata !18, null}
+!27 = metadata !{i32 24, i32 3, metadata !18, null}
+!28 = metadata !{i32 26, i32 3, metadata !18, null}
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.cc b/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
new file mode 100644
index 0000000..8ffbb52
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
@@ -0,0 +1,15 @@
+#include "dwarfdump-inl-test.h"
+static inline int inlined_f() {
+  volatile int x = inlined_g();
+  return x;
+}
+
+int main() {
+  return inlined_f();
+}
+
+// Built with Clang 3.2
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-inl-test.* /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ clang++ -O2 -gline-tables-only -fsanitize=address -fPIC -shared dwarfdump-inl-test.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
index 9a1d538..6df03da 100755
--- a/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.h b/test/DebugInfo/Inputs/dwarfdump-inl-test.h
new file mode 100644
index 0000000..ecc2aaa
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.h
@@ -0,0 +1,9 @@
+inline int inlined_h() {
+  volatile int z = 0;
+  return z;
+}
+
+inline int inlined_g() {
+  volatile int y = inlined_h();
+  return y;
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-pubnames.cc b/test/DebugInfo/Inputs/dwarfdump-pubnames.cc
new file mode 100644
index 0000000..284755b
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-pubnames.cc
@@ -0,0 +1,32 @@
+// Object file built using:
+// clang -g -mllvm -generate-dwarf-pubnames -o dwarfdump-pubnames.elf-x86_64 \
+//    dwarfdump-pubnames.cc  -c
+
+struct C {
+  void member_function();
+  static int static_member_function();
+  static int static_member_variable;
+};
+
+int C::static_member_variable = 0;
+
+void C::member_function() {
+  static_member_variable = 0;
+}
+
+int C::static_member_function() {
+  return static_member_variable;
+}
+
+C global_variable;
+
+int global_function() {
+  return -1;
+}
+
+namespace ns {
+  void global_namespace_function() {
+    global_variable.member_function();
+  }
+  int global_namespace_variable = 1;
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-pubnames.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-pubnames.elf-x86-64
new file mode 100644
index 0000000..3c9c1ad
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-pubnames.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.c b/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.c
new file mode 100644
index 0000000..708e037
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.c
@@ -0,0 +1,14 @@
+// clang -c -g -o dwarfdump-test-32bit.elf.o -m32 dwarfdump-test-32bit.elf.c
+
+extern int glob;
+
+int foo(int arg) {
+  int a = arg * 2;
+  return a + glob;
+}
+
+int bar(int arg) {
+  int a = foo(arg) * foo(arg * 2);
+  return glob - foo(a);
+}
+
diff --git a/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.o b/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.o
new file mode 100644
index 0000000..817665e
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.o
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.cc b/test/DebugInfo/Inputs/dwarfdump-test.cc
new file mode 100644
index 0000000..4089998
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test.cc
@@ -0,0 +1,23 @@
+class DummyClass {
+  int a_;
+ public:
+  DummyClass(int a) : a_(a) {}
+  int add(int b) {
+    return a_ + b;
+  }
+};
+
+int f(int a, int b) {
+  DummyClass c(a);
+  return c.add(b);
+}
+
+int main() {
+  return f(2, 3);
+}
+
+// Built with Clang 3.2:
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-test.cc /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ clang++ -g dwarfdump-test.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64
index fe20c8e..455dd1c 100755
--- a/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test2-helper.cc b/test/DebugInfo/Inputs/dwarfdump-test2-helper.cc
new file mode 100644
index 0000000..7d92640
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test2-helper.cc
@@ -0,0 +1,3 @@
+extern "C" int a() {
+  return 0;
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test2-main.cc b/test/DebugInfo/Inputs/dwarfdump-test2-main.cc
new file mode 100644
index 0000000..b327674
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test2-main.cc
@@ -0,0 +1,11 @@
+extern "C" int a();
+
+int main() {
+  return a();
+}
+
+// Built with gcc 4.6.3
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-test2-helper.cc dwarfdump-test2-main.cc /tmp/dbginfo/
+// $ cd /tmp/dbginfo
+// $ g++ -g dwarfdump-test2-helper.cc dwarfdump-test2-main.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64
index ce4af7f..6f362ad 100755
--- a/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3-decl.h b/test/DebugInfo/Inputs/dwarfdump-test3-decl.h
new file mode 100644
index 0000000..4a79e95
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test3-decl.h
@@ -0,0 +1,7 @@
+#include "dwarfdump-test3-decl2.h"
+
+class C {
+  explicit C(bool a = false, bool b = false);
+};
+
+void do1() {}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3-decl2.h b/test/DebugInfo/Inputs/dwarfdump-test3-decl2.h
new file mode 100644
index 0000000..9c92d56
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test3-decl2.h
@@ -0,0 +1 @@
+void do2() { }
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3.cc b/test/DebugInfo/Inputs/dwarfdump-test3.cc
new file mode 100644
index 0000000..7b4d7ea
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test3.cc
@@ -0,0 +1,12 @@
+#include "dwarfdump-test3-decl.h"
+
+C::C(bool a, bool b) {}
+
+// Built with gcc 4.6.3
+// $ mkdir -p /tmp/dbginfo/include
+// $ mkdir -p /tmp/include
+// $ cp dwarfdump-test3.cc /tmp/dbginfo
+// $ cp dwarfdump-test3-decl.h /tmp/include
+// $ cp dwarfdump-test3-decl2.h /tmp/dbginfo/include
+// $ cd /tmp/dbginfo
+// $ gcc dwarfdump-test3.cc -g -I/tmp/include -Iinclude -fPIC -shared -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64
index 7c17304..7330cd8 100755
--- a/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4-decl.h b/test/DebugInfo/Inputs/dwarfdump-test4-decl.h
new file mode 100644
index 0000000..9abd875
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test4-decl.h
@@ -0,0 +1 @@
+inline void a(){}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4-part1.cc b/test/DebugInfo/Inputs/dwarfdump-test4-part1.cc
new file mode 100644
index 0000000..94a818c
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test4-part1.cc
@@ -0,0 +1,8 @@
+#include "dwarfdump-test4-decl.h"
+int c(){a();}
+
+// Built with gcc 4.6.3
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-test4-*.* /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ gcc -fPIC -shared -g dwarfdump-test4-part*.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4-part2.cc b/test/DebugInfo/Inputs/dwarfdump-test4-part2.cc
new file mode 100644
index 0000000..2a1936f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test4-part2.cc
@@ -0,0 +1,2 @@
+#include "dwarfdump-test4-decl.h"
+int d(){a();}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
index 8848708..a1dd8b9 100755
--- a/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
diff --git a/test/DebugInfo/Inputs/lit.local.cfg b/test/DebugInfo/Inputs/lit.local.cfg
new file mode 100644
index 0000000..e6f55ee
--- /dev/null
+++ b/test/DebugInfo/Inputs/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = []
diff --git a/test/DebugInfo/Inputs/test-inline.o b/test/DebugInfo/Inputs/test-inline.o
new file mode 100644
index 0000000..a650c91
--- /dev/null
+++ b/test/DebugInfo/Inputs/test-inline.o
diff --git a/test/DebugInfo/Inputs/test-parameters.o b/test/DebugInfo/Inputs/test-parameters.o
new file mode 100644
index 0000000..7f4b670
--- /dev/null
+++ b/test/DebugInfo/Inputs/test-parameters.o
diff --git a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
index 78f8750..e440df7 100644
--- a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
+++ b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
@@ -1,5 +1,5 @@
 ; RUN: llc  -mtriple=i686-linux -O0 -filetype=obj -o %t %s
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; CHECK: DW_TAG_constant [4]
 ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000002c] = "ro")
 
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index e514493..d682ab2 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-pc-linux-gnu %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; ModuleID = 'test.c'
 
@@ -19,10 +19,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", metadata !"clang version 3.0 (trunk)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
@@ -30,8 +28,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{metadata !14}
+!12 = metadata !{metadata !14}
 !14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB} ; [ DW_TAG_variable ]
 !15 = metadata !{i32 721152, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !16 = metadata !{i32 720907, metadata !5, i32 3, i32 9, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index 6e20169..ba8a763 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: b_ref
 ; CHECK-NOT: AT_bit_size
@@ -89,10 +89,8 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 4, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", metadata !"clang version 3.1 (trunk 146596)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !9}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !9}
 !5 = metadata !{i32 720898, null, metadata !"bar", metadata !6, i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
 !6 = metadata !{i32 720937, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8, metadata !19, metadata !21}
@@ -115,8 +113,7 @@ entry:
 !24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!27 = metadata !{metadata !28}
-!28 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
+!27 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
 !29 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
 !30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !31 = metadata !{metadata !12, metadata !12, metadata !32}
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index 25b5f00..ce3cf00 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
 
 ; Checks that we don't emit a size for a pointer type.
 ; CHECK: DW_TAG_pointer_type
@@ -25,10 +25,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 150996)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index b1fbbf7..789fceb 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_formal_parameter [
 ; CHECK: DW_TAG_class_type
@@ -48,10 +48,8 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests", metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !10, metadata !20}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !10, metadata !20}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !6, i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
 !6 = metadata !{i32 786473, metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index 078b740..67cfb47 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; test that the DW_AT_specification is a back edge in the file.
 
@@ -17,10 +17,8 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 4, metadata !"<unknown>", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
@@ -34,8 +32,7 @@ entry:
 !15 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!18 = metadata !{metadata !19}
-!19 = metadata !{metadata !20}
+!18 = metadata !{metadata !20}
 !20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x} ; [ DW_TAG_variable ]
 !21 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
 !22 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index a0dcec3..0671a40 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Check that the friend tag is there and is followed by a DW_AT_friend that has a reference back.
 
@@ -18,10 +18,8 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !17}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !17}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index 9e6c7ff..85027aa 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -mtriple=x86_64-pc-linux-gnu -O0 -filetype=obj -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; If stack is realigned, we shouldn't describe locations of local
 ; variables by giving offset from the frame pointer (%rbp):
@@ -27,10 +27,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index 4953c42..a1ff6ac 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Checks that we emit debug info for the block variable declare.
 ; CHECK: 0x00000030:   DW_TAG_subprogram [3]
@@ -63,10 +63,8 @@ declare i32 @__objc_personality_v0(...)
 !llvm.module.flags = !{!35, !36, !37, !38}
 
 !0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 151227)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 58fb055..ef83a51 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-linux %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; test that we add DW_AT_inline even when we only have concrete out of line
 ; instances.
@@ -35,10 +35,8 @@ declare void @_Z8moz_freePv(i8*)
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 4, metadata !"nsAutoRefCnt.cpp", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", metadata !"clang version 3.1 ()", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
 !5 = metadata !{i32 720942, i32 0, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
@@ -52,18 +50,14 @@ declare void @_Z8moz_freePv(i8*)
 !15 = metadata !{i32 720942, i32 0, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", metadata !6, i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
 !16 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null, metadata !10}
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!20 = metadata !{metadata !21}
-!21 = metadata !{metadata !22}
+!18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!20 = metadata !{metadata !22}
 !22 = metadata !{i32 721153, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !23 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24} ; [ DW_TAG_subprogram ]
-!24 = metadata !{metadata !25}
-!25 = metadata !{metadata !26}
+!24 = metadata !{metadata !26}
 !26 = metadata !{i32 721153, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !27 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28} ; [ DW_TAG_subprogram ]
-!28 = metadata !{metadata !29}
-!29 = metadata !{metadata !30}
+!28 = metadata !{metadata !30}
 !30 = metadata !{i32 721153, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !31 = metadata !{i32 720942, i32 0, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43} ; [ DW_TAG_subprogram ]
 !32 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
@@ -77,12 +71,10 @@ declare void @_Z8moz_freePv(i8*)
 !40 = metadata !{i32 720942, i32 0, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", metadata !6, i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
 !41 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !42 = metadata !{null, metadata !34}
-!43 = metadata !{metadata !44}
-!44 = metadata !{metadata !45, metadata !46}
+!43 = metadata !{metadata !45, metadata !46}
 !45 = metadata !{i32 721153, metadata !31, metadata !"this", metadata !6, i32 16777220, metadata !34, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !46 = metadata !{i32 721153, metadata !31, metadata !"aValue", metadata !6, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!47 = metadata !{metadata !48}
-!48 = metadata !{metadata !49}
+!47 = metadata !{metadata !49}
 !49 = metadata !{i32 720948, i32 0, null, metadata !"mRefCnt", metadata !"mRefCnt", metadata !"", metadata !6, i32 9, metadata !37, i32 0, i32 1, i32* null} ; [ DW_TAG_variable ]
 !50 = metadata !{i32 5, i32 5, metadata !51, metadata !52}
 !51 = metadata !{i32 720907, metadata !31, i32 4, i32 29, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index a09a7ea..c6052b1 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -filetype=obj -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic %s -filetype=obj -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ;CHECK: DW_TAG_inlined_subroutine [12]
 ;CHECK-NEXT: DW_AT_abstract_origin
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
new file mode 100644
index 0000000..d7a6578
--- /dev/null
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -0,0 +1,171 @@
+; RUN: llc %s -o %t -filetype=obj -O0 -mtriple=x86_64-unknown-linux-gnu
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=PRESENT
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=ABSENT
+; Verify that attributes we do want are PRESENT;
+; verify that attributes we don't want are ABSENT.
+; It's a lot easier to do this in two passes than in one.
+; PR14471
+
+; LLVM IR generated using: clang -emit-llvm -S -g
+; (with the Clang part of this patch applied).
+;
+; class C
+; {
+;   static int a;
+;   const static bool const_a = true;
+; protected:
+;   static int b;
+;   const static float const_b = 3.14;
+; public:
+;   static int c;
+;   const static int const_c = 18;
+;   int d;
+; };
+; 
+; int C::a = 4;
+; int C::b = 2;
+; int C::c = 1;
+; 
+; int main()
+; {
+;         C instance_C;
+;         instance_C.d = 8;
+;         return C::c;
+; }
+
+%class.C = type { i32 }
+
+@_ZN1C1aE = global i32 4, align 4
+@_ZN1C1bE = global i32 2, align 4
+@_ZN1C1cE = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %instance_C = alloca %class.C, align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{%class.C* %instance_C}, metadata !29), !dbg !30
+  %d = getelementptr inbounds %class.C* %instance_C, i32 0, i32 0, !dbg !31
+  store i32 8, i32* %d, align 4, !dbg !31
+  %0 = load i32* @_ZN1C1cE, align 4, !dbg !32
+  ret i32 %0, !dbg !32
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"debug-info-static-member.cpp", metadata !"/home/probinson/projects/upstream/static-member/test", metadata !"clang version 3.3 (trunk 171914)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
+!6 = metadata !{i32 786473, metadata !"/usr/local/google/home/blaikie/Development/llvm/src/tools/clang/test/CodeGenCXX/debug-info-static-member.cpp", metadata !"/home/blaikie/local/Development/llvm/build/clang/x86-64/Debug/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !12, metadata !27, metadata !28}
+!12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"a", metadata !"a", metadata !"_ZN1C1aE", metadata !6, i32 14, metadata !9, i32 0, i32 1, i32* @_ZN1C1aE, metadata !15} ; [ DW_TAG_variable ] [a] [line 14] [def]
+!13 = metadata !{i32 786434, null, metadata !"C", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [from ]
+!14 = metadata !{metadata !15, metadata !16, metadata !19, metadata !20, metadata !23, metadata !24, metadata !26}
+!15 = metadata !{i32 786445, metadata !13, metadata !"a", metadata !6, i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int]
+!16 = metadata !{i32 786445, metadata !13, metadata !"const_a", metadata !6, i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ]
+!17 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from bool]
+!18 = metadata !{i32 786468, null, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!19 = metadata !{i32 786445, metadata !13, metadata !"b", metadata !6, i32 6, i64 0, i64 0, i64 0, i32 4098, metadata !9, null} ; [ DW_TAG_member ] [b] [line 6, size 0, align 0, offset 0] [protected] [static] [from int]
+!20 = metadata !{i32 786445, metadata !13, metadata !"const_b", metadata !6, i32 7, i64 0, i64 0, i64 0, i32 4098, metadata !21, float 0x40091EB860000000} ; [ DW_TAG_member ] [const_b] [line 7, size 0, align 0, offset 0] [protected] [static] [from ]
+!21 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from float]
+!22 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!23 = metadata !{i32 786445, metadata !13, metadata !"c", metadata !6, i32 9, i64 0, i64 0, i64 0, i32 4096, metadata !9, null} ; [ DW_TAG_member ] [c] [line 9, size 0, align 0, offset 0] [static] [from int]
+!24 = metadata !{i32 786445, metadata !13, metadata !"const_c", metadata !6, i32 10, i64 0, i64 0, i64 0, i32 4096, metadata !25, i32 18} ; [ DW_TAG_member ] [const_c] [line 10, size 0, align 0, offset 0] [static] [from ]
+!25 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!26 = metadata !{i32 786445, metadata !13, metadata !"d", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [d] [line 11, size 32, align 32, offset 0] [from int]
+!27 = metadata !{i32 786484, i32 0, metadata !13, metadata !"b", metadata !"b", metadata !"_ZN1C1bE", metadata !6, i32 15, metadata !9, i32 0, i32 1, i32* @_ZN1C1bE, metadata !19} ; [ DW_TAG_variable ] [b] [line 15] [def]
+!28 = metadata !{i32 786484, i32 0, metadata !13, metadata !"c", metadata !"c", metadata !"_ZN1C1cE", metadata !6, i32 16, metadata !9, i32 0, i32 1, i32* @_ZN1C1cE, metadata !23} ; [ DW_TAG_variable ] [c] [line 16] [def]
+!29 = metadata !{i32 786688, metadata !5, metadata !"instance_C", metadata !6, i32 20, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [instance_C] [line 20]
+!30 = metadata !{i32 20, i32 0, metadata !5, null}
+!31 = metadata !{i32 21, i32 0, metadata !5, null}
+!32 = metadata !{i32 22, i32 0, metadata !5, null}
+; PRESENT verifies that static member declarations have these attributes:
+; external, declaration, accessibility, and either DW_AT_MIPS_linkage_name
+; (for variables) or DW_AT_const_value (for constants).
+;
+; PRESENT:      .debug_info contents:
+; PRESENT:      DW_TAG_class_type
+; PRESENT-NEXT: DW_AT_name {{.*}} "C"
+; PRESENT:      0x[[DECL_A:[0-9a-f]+]]: DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "a"
+; PRESENT:      DW_AT_external
+; PRESENT:      DW_AT_declaration
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; PRESENT:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1aE"
+; PRESENT:      DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "const_a"
+; PRESENT:      DW_AT_external
+; PRESENT:      DW_AT_declaration
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; PRESENT:      DW_AT_const_value {{.*}} (1)
+; PRESENT:      0x[[DECL_B:[0-9a-f]+]]: DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "b"
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; PRESENT:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1bE"
+; PRESENT:      DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "const_b"
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; PRESENT:      DW_AT_const_value {{.*}} (0x4048f5c3)
+; PRESENT:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "c"
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1cE"
+; PRESENT:      DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "const_c"
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      DW_AT_const_value {{.*}} (0x00000012)
+; While we're here, a normal member has data_member_location and
+; accessibility attributes.
+; PRESENT:      DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "d"
+; PRESENT:      DW_AT_data_member_location
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      NULL
+; Definitions point back to their declarations, and have a location.
+; PRESENT:      DW_TAG_variable
+; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_A]]}
+; PRESENT-NEXT: DW_AT_location
+; PRESENT:      DW_TAG_variable
+; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_B]]}
+; PRESENT-NEXT: DW_AT_location
+; PRESENT:      DW_TAG_variable
+; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_C]]}
+; PRESENT-NEXT: DW_AT_location
+
+; ABSENT verifies that static member declarations do not have either
+; DW_AT_location or DW_AT_data_member_location; also, variables do not
+; have DW_AT_const_value and constants do not have DW_AT_MIPS_linkage_name.
+;
+; ABSENT:      .debug_info contents:
+; ABSENT:      DW_TAG_member
+; ABSENT:      DW_AT_name {{.*}} "a"
+; ABSENT-NOT:  DW_AT_const_value
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "const_a"
+; ABSENT-NOT:  DW_AT_MIPS_linkage_name
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "b"
+; ABSENT-NOT:  DW_AT_const_value
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "const_b"
+; ABSENT-NOT:  DW_AT_MIPS_linkage_name
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "c"
+; ABSENT-NOT:  DW_AT_const_value
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "const_c"
+; ABSENT-NOT:  DW_AT_MIPS_linkage_name
+; ABSENT-NOT:  location
+; While we're here, a normal member does not have a linkage name, constant
+; value, or DW_AT_location.
+; ABSENT:      DW_AT_name {{.*}} "d"
+; ABSENT-NOT:  DW_AT_MIPS_linkage_name
+; ABSENT-NOT:  DW_AT_const_value
+; ABSENT-NOT:  DW_AT_location
+; ABSENT:      NULL
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index b6a263d..f4df0b7 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; RUN: llvm-as < %s | llvm-dis | FileCheck --check-prefix=CHECK-DIS %s
 
 ; CHECK: 0x0000000b: DW_TAG_compile_unit
@@ -59,10 +59,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo", metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !31}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !31}
 !5 = metadata !{i32 786478, i32 0, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", metadata !6, i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index 0744c6b..c3bdbc4 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; <rdar://problem/12566646>
 
 %struct.foo = type { i32, [1 x i32] }
@@ -60,10 +60,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"test.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"func", metadata !"func", metadata !"", metadata !6, i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
 !6 = metadata !{i32 786473, metadata !"test.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index dd5c636..b4621fb 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; <rdar://problem/12566646>
 
 %class.A = type { [0 x i32] }
@@ -25,10 +25,8 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index 6935c47..3813e1f 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 ; Check that the line table starts at 7, not 4, but that the first
 ; statement isn't until line 8.
@@ -29,10 +29,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index 6eb715d..d129603 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 @a = global i32 0, align 4
 @b = global i64 0, align 8
@@ -8,8 +8,7 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{metadata !3, metadata !8, metadata !12}
+!1 = metadata !{metadata !3, metadata !8, metadata !12}
 !3 = metadata !{i32 786436, null, metadata !"A", metadata !4, i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !4 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -22,10 +21,8 @@
 !12 = metadata !{i32 786436, null, metadata !"C", metadata !4, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !13 = metadata !{metadata !14}
 !14 = metadata !{i32 786472, metadata !"C1", i64 1} ; [ DW_TAG_enumerator ]
-!15 = metadata !{metadata !16}
-!16 = metadata !{i32 0}
-!17 = metadata !{metadata !18}
-!18 = metadata !{metadata !19, metadata !20, metadata !21}
+!15 = metadata !{i32 0}
+!17 = metadata !{metadata !19, metadata !20, metadata !21}
 !19 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !4, i32 4, metadata !3, i32 0, i32 1, i32* @a} ; [ DW_TAG_variable ]
 !20 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !4, i32 5, metadata !8, i32 0, i32 1, i64* @b} ; [ DW_TAG_variable ]
 !21 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !4, i32 6, metadata !12, i32 0, i32 1, i32* @c} ; [ DW_TAG_variable ]
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index 0902430..04ac8e4 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -1,15 +1,13 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 @e = global i16 0, align 2
 
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/tmp", metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e} ; [ DW_TAG_variable ] [e] [line 2] [def]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786436, null, metadata !"E", metadata !6, i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ]
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
index 3ada3ef..d0ae6c7 100644
--- a/test/DebugInfo/X86/fission-cu.ll
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -1,15 +1,13 @@
 ; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
 
 @a = common global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @a} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
@@ -19,21 +17,83 @@
 ; DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
 ; DW_AT_ranges_base, DW_AT_addr_base.
 
+; CHECK: .debug_abbrev contents:
+; CHECK: Abbrev table for offset: 0x00000000
+; CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_no
+; CHECK: DW_AT_GNU_dwo_name      DW_FORM_strp
+; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
+; CHECK: DW_AT_GNU_addr_base     DW_FORM_sec_offset
+; CHECK: DW_AT_low_pc    DW_FORM_addr
+; CHECK: DW_AT_stmt_list DW_FORM_sec_offset
+; CHECK: DW_AT_comp_dir  DW_FORM_strp
+
 ; CHECK: .debug_info contents:
 ; CHECK: DW_TAG_compile_unit
-; CHECK: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000000] = "baz.c")
+; CHECK: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000000] = "baz.dwo")
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
+; CHECK: DW_AT_GNU_addr_base [DW_FORM_sec_offset]                   (0x00000000)
 ; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
-; CHECK: DW_AT_stmt_list [DW_FORM_data4]   (0x00000000)
-; CHECK: DW_AT_comp_dir [DW_FORM_strp]     ( .debug_str[0x00000006] = "/usr/local/google/home/echristo/tmp")
+; CHECK: DW_AT_stmt_list [DW_FORM_sec_offset]   (0x00000000)
+; CHECK: DW_AT_comp_dir [DW_FORM_strp]     ( .debug_str[0x00000008] = "/usr/local/google/home/echristo/tmp")
+
+; CHECK: .debug_str contents:
+; CHECK: 0x00000000: "baz.dwo"
+; CHECK: 0x00000008: "/usr/local/google/home/echristo/tmp"
+
+; Check that we're using the right forms.
+; CHECK: .debug_abbrev.dwo contents:
+; CHECK: Abbrev table for offset: 0x00000000
+; CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+; CHECK: DW_AT_producer  DW_FORM_GNU_str_index
+; CHECK: DW_AT_language  DW_FORM_data2
+; CHECK: DW_AT_name      DW_FORM_GNU_str_index
+; CHECK: DW_AT_low_pc    DW_FORM_GNU_addr_index
+; CHECK: DW_AT_stmt_list DW_FORM_data4
+; CHECK: DW_AT_comp_dir  DW_FORM_GNU_str_index
+; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
+
+; CHECK: [2] DW_TAG_base_type    DW_CHILDREN_no
+; CHECK: DW_AT_name      DW_FORM_GNU_str_index
+; CHECK: DW_AT_encoding  DW_FORM_data1
+; CHECK: DW_AT_byte_size DW_FORM_data1
+
+; CHECK: [3] DW_TAG_variable     DW_CHILDREN_no
+; CHECK: DW_AT_name      DW_FORM_GNU_str_index
+; CHECK: DW_AT_type      DW_FORM_ref4
+; CHECK: DW_AT_external  DW_FORM_flag_present
+; CHECK: DW_AT_decl_file DW_FORM_data1
+; CHECK: DW_AT_decl_line DW_FORM_data1
+; CHECK: DW_AT_location  DW_FORM_block1
 
 ; Check that the rest of the compile units have information.
-; FIXME: Strings will ultimately be a different form.
 ; CHECK: .debug_info.dwo contents:
 ; CHECK: DW_TAG_compile_unit
 ; CHECK: DW_AT_producer [DW_FORM_GNU_str_index] ( indexed (00000000) string = "clang version 3.3 (trunk 169021) (llvm/trunk 169020)")
 ; CHECK: DW_AT_language [DW_FORM_data2]        (0x000c)
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]    ( indexed (00000001) string = "baz.c")
+; CHECK: DW_AT_low_pc [DW_FORM_GNU_addr_index]     ( indexed (00000000) address = 0x0000000000000000)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
 ; CHECK: DW_TAG_base_type
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000004) string = "int")
 ; CHECK: DW_TAG_variable
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "a")
+; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x001e => {0x0000001e})
+; CHECK: DW_AT_external [DW_FORM_flag_present]   (true)
+; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
+; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
+; CHECK: DW_AT_location [DW_FORM_block1] (<0x02> fb 01 )
+
+
+; CHECK: .debug_str.dwo contents:
+; CHECK: 0x00000000: "clang version 3.3 (trunk 169021) (llvm/trunk 169020)"
+; CHECK: 0x00000035: "baz.c"
+; CHECK: 0x0000003b: "/usr/local/google/home/echristo/tmp"
+; CHECK: 0x0000005f: "a"
+; CHECK: 0x00000061: "int"
+
+; CHECK: .debug_str_offsets.dwo contents:
+; CHECK: 0x00000000: 00000000
+; CHECK: 0x00000004: 00000035
+; CHECK: 0x00000008: 0000003b
+; CHECK: 0x0000000c: 0000005f
+; CHECK: 0x00000010: 00000061
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
new file mode 100644
index 0000000..92dd072
--- /dev/null
+++ b/test/DebugInfo/X86/line-info.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: 2      0      1   0  is_stmt
+
+; IR generated from clang -g -emit-llvm with the following source:
+; list0.h:
+; int foo (int x) {
+;     return ++x;
+; }
+; list0.c:
+; #include "list0.h"
+; int main() {
+; }
+
+define i32 @foo(i32 %x) nounwind uwtable {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !14), !dbg !15
+  %0 = load i32* %x.addr, align 4, !dbg !16
+  %inc = add nsw i32 %0, 1, !dbg !16
+  store i32 %inc, i32* %x.addr, align 4, !dbg !16
+  ret i32 %inc, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind uwtable {
+entry:
+  ret i32 0, !dbg !17
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch/pr14566", metadata !"clang version 3.3 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr14566/list0.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !10}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786473, metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch/pr14566", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, i32 0, metadata !11, metadata !"main", metadata !"main", metadata !"", metadata !11, i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
+!11 = metadata !{i32 786473, metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch/pr14566", null} ; [ DW_TAG_file_type ]
+!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{metadata !9}
+!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!15 = metadata !{i32 1, i32 0, metadata !5, null}
+!16 = metadata !{i32 2, i32 0, metadata !5, null}
+!17 = metadata !{i32 3, i32 0, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !10, metadata !11} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/pr14566/list0.c]
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index b984923..a3c2a9b 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-macosx -darwin-gdb-compat=Disable %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_subprogram [9] *
 ; CHECK-NOT: DW_AT_MIPS_linkage_name
@@ -27,10 +27,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
@@ -44,8 +42,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!18 = metadata !{metadata !19}
-!19 = metadata !{metadata !20}
+!18 = metadata !{metadata !20}
 !20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ]
 !21 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777221, metadata !22, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index f9d9b91..2240f36 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Check that we use DW_AT_low_pc
 
@@ -15,10 +15,8 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !12}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !12}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"q", metadata !"q", metadata !"_Z1qv", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/X86/main-file-name.s b/test/DebugInfo/X86/main-file-name.s
index 6817c9e..0369c61 100644
--- a/test/DebugInfo/X86/main-file-name.s
+++ b/test/DebugInfo/X86/main-file-name.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -triple x86_64-unknown-linux-gnu -filetype obj -main-file-name foo.S -g -o %t %s
-// RUN: llvm-dwarfdump %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 // CHECK: DW_TAG_compile_unit [1]
 // CHECK-NOT: DW_TAG_
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
new file mode 100644
index 0000000..6c1032e
--- /dev/null
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -0,0 +1,173 @@
+; RUN: llc %s -mtriple=x86_64-apple-darwin -filetype=obj -o %t -enable-misched
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; rdar://13183203
+; Make sure when misched is enabled, we still have location information for
+; function parameters.
+; CHECK: .debug_info contents:
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_subprogram
+; CHECK: Proc8
+; CHECK: DW_TAG_formal_parameter
+; CHECK: Array1Par
+; CHECK: DW_AT_location
+; CHECK: DW_TAG_formal_parameter
+; CHECK: Array2Par
+; CHECK: DW_AT_location
+; CHECK: DW_TAG_formal_parameter
+; CHECK: IntParI1
+; CHECK: DW_AT_location
+; CHECK: DW_TAG_formal_parameter
+; CHECK: IntParI2
+; CHECK: DW_AT_location
+
+%struct.Record = type { %struct.Record*, i32, i32, i32, [31 x i8] }
+
+@Version = global [4 x i8] c"1.1\00", align 1
+@IntGlob = common global i32 0, align 4
+@BoolGlob = common global i32 0, align 4
+@Char1Glob = common global i8 0, align 1
+@Char2Glob = common global i8 0, align 1
+@Array1Glob = common global [51 x i32] zeroinitializer, align 16
+@Array2Glob = common global [51 x [51 x i32]] zeroinitializer, align 16
+@PtrGlb = common global %struct.Record* null, align 8
+@PtrGlbNext = common global %struct.Record* null, align 8
+
+define void @Proc8(i32* nocapture %Array1Par, [51 x i32]* nocapture %Array2Par, i32 %IntParI1, i32 %IntParI2) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32* %Array1Par}, i64 0, metadata !23), !dbg !64
+  tail call void @llvm.dbg.value(metadata !{[51 x i32]* %Array2Par}, i64 0, metadata !24), !dbg !65
+  tail call void @llvm.dbg.value(metadata !{i32 %IntParI1}, i64 0, metadata !25), !dbg !66
+  tail call void @llvm.dbg.value(metadata !{i32 %IntParI2}, i64 0, metadata !26), !dbg !67
+  %add = add i32 %IntParI1, 5, !dbg !68
+  tail call void @llvm.dbg.value(metadata !{i32 %add}, i64 0, metadata !27), !dbg !68
+  %idxprom = sext i32 %add to i64, !dbg !69
+  %arrayidx = getelementptr inbounds i32* %Array1Par, i64 %idxprom, !dbg !69
+  store i32 %IntParI2, i32* %arrayidx, align 4, !dbg !69, !tbaa !70
+  %add3 = add nsw i32 %IntParI1, 6, !dbg !73
+  %idxprom4 = sext i32 %add3 to i64, !dbg !73
+  %arrayidx5 = getelementptr inbounds i32* %Array1Par, i64 %idxprom4, !dbg !73
+  store i32 %IntParI2, i32* %arrayidx5, align 4, !dbg !73, !tbaa !70
+  %add6 = add nsw i32 %IntParI1, 35, !dbg !74
+  %idxprom7 = sext i32 %add6 to i64, !dbg !74
+  %arrayidx8 = getelementptr inbounds i32* %Array1Par, i64 %idxprom7, !dbg !74
+  store i32 %add, i32* %arrayidx8, align 4, !dbg !74, !tbaa !70
+  tail call void @llvm.dbg.value(metadata !{i32 %add}, i64 0, metadata !28), !dbg !75
+  br label %for.body, !dbg !75
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %idxprom, %entry ], [ %indvars.iv.next, %for.body ]
+  %IntIndex.046 = phi i32 [ %add, %entry ], [ %inc, %for.body ]
+  %arrayidx13 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom, i64 %indvars.iv, !dbg !77
+  store i32 %add, i32* %arrayidx13, align 4, !dbg !77, !tbaa !70
+  %inc = add nsw i32 %IntIndex.046, 1, !dbg !75
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !28), !dbg !75
+  %cmp = icmp sgt i32 %inc, %add3, !dbg !75
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !75
+  br i1 %cmp, label %for.end, label %for.body, !dbg !75
+
+for.end:                                          ; preds = %for.body
+  %sub = add nsw i32 %IntParI1, 4, !dbg !78
+  %idxprom14 = sext i32 %sub to i64, !dbg !78
+  %arrayidx17 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom, i64 %idxprom14, !dbg !78
+  %0 = load i32* %arrayidx17, align 4, !dbg !78, !tbaa !70
+  %inc18 = add nsw i32 %0, 1, !dbg !78
+  store i32 %inc18, i32* %arrayidx17, align 4, !dbg !78, !tbaa !70
+  %1 = load i32* %arrayidx, align 4, !dbg !79, !tbaa !70
+  %add22 = add nsw i32 %IntParI1, 25, !dbg !79
+  %idxprom23 = sext i32 %add22 to i64, !dbg !79
+  %arrayidx25 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom23, i64 %idxprom, !dbg !79
+  store i32 %1, i32* %arrayidx25, align 4, !dbg !79, !tbaa !70
+  store i32 5, i32* @IntGlob, align 4, !dbg !80, !tbaa !70
+  ret void, !dbg !81
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+attributes #0 = { nounwind optsize ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2", metadata !"clang version 3.3 (trunk 175015)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 786436, null, metadata !"", metadata !3, i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ]
+!3 = metadata !{i32 786473, metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2", null} ; [ DW_TAG_file_type ]
+!4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8, metadata !9}
+!5 = metadata !{i32 786472, metadata !"Ident1", i64 0} ; [ DW_TAG_enumerator ] [Ident1 :: 0]
+!6 = metadata !{i32 786472, metadata !"Ident2", i64 10000} ; [ DW_TAG_enumerator ] [Ident2 :: 10000]
+!7 = metadata !{i32 786472, metadata !"Ident3", i64 10001} ; [ DW_TAG_enumerator ] [Ident3 :: 10001]
+!8 = metadata !{i32 786472, metadata !"Ident4", i64 10002} ; [ DW_TAG_enumerator ] [Ident4 :: 10002]
+!9 = metadata !{i32 786472, metadata !"Ident5", i64 10003} ; [ DW_TAG_enumerator ] [Ident5 :: 10003]
+!10 = metadata !{i32 0}
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786478, i32 0, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", metadata !3, i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
+!13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{null, metadata !15, metadata !17, metadata !21, metadata !21}
+!15 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!17 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!18 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786465, i64 0, i64 51}       ; [ DW_TAG_subrange_type ] [0, 50]
+!21 = metadata !{i32 786454, null, metadata !"OneToFifty", metadata !3, i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int]
+!22 = metadata !{metadata !23, metadata !24, metadata !25, metadata !26, metadata !27, metadata !28}
+!23 = metadata !{i32 786689, metadata !12, metadata !"Array1Par", metadata !3, i32 16777397, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array1Par] [line 181]
+!24 = metadata !{i32 786689, metadata !12, metadata !"Array2Par", metadata !3, i32 33554614, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array2Par] [line 182]
+!25 = metadata !{i32 786689, metadata !12, metadata !"IntParI1", metadata !3, i32 50331831, metadata !21, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [IntParI1] [line 183]
+!26 = metadata !{i32 786689, metadata !12, metadata !"IntParI2", metadata !3, i32 67109048, metadata !21, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [IntParI2] [line 184]
+!27 = metadata !{i32 786688, metadata !12, metadata !"IntLoc", metadata !3, i32 186, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntLoc] [line 186]
+!28 = metadata !{i32 786688, metadata !12, metadata !"IntIndex", metadata !3, i32 187, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntIndex] [line 187]
+!29 = metadata !{metadata !30, metadata !35, metadata !36, metadata !38, metadata !39, metadata !40, metadata !42, metadata !46, metadata !63}
+!30 = metadata !{i32 786484, i32 0, null, metadata !"Version", metadata !"Version", metadata !"", metadata !3, i32 111, metadata !31, i32 0, i32 1, [4 x i8]* @Version, null} ; [ DW_TAG_variable ] [Version] [line 111] [def]
+!31 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
+!32 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!33 = metadata !{metadata !34}
+!34 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
+!35 = metadata !{i32 786484, i32 0, null, metadata !"IntGlob", metadata !"IntGlob", metadata !"", metadata !3, i32 171, metadata !16, i32 0, i32 1, i32* @IntGlob, null} ; [ DW_TAG_variable ] [IntGlob] [line 171] [def]
+!36 = metadata !{i32 786484, i32 0, null, metadata !"BoolGlob", metadata !"BoolGlob", metadata !"", metadata !3, i32 172, metadata !37, i32 0, i32 1, i32* @BoolGlob, null} ; [ DW_TAG_variable ] [BoolGlob] [line 172] [def]
+!37 = metadata !{i32 786454, null, metadata !"boolean", metadata !3, i32 149, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [boolean] [line 149, size 0, align 0, offset 0] [from int]
+!38 = metadata !{i32 786484, i32 0, null, metadata !"Char1Glob", metadata !"Char1Glob", metadata !"", metadata !3, i32 173, metadata !32, i32 0, i32 1, i8* @Char1Glob, null} ; [ DW_TAG_variable ] [Char1Glob] [line 173] [def]
+!39 = metadata !{i32 786484, i32 0, null, metadata !"Char2Glob", metadata !"Char2Glob", metadata !"", metadata !3, i32 174, metadata !32, i32 0, i32 1, i8* @Char2Glob, null} ; [ DW_TAG_variable ] [Char2Glob] [line 174] [def]
+!40 = metadata !{i32 786484, i32 0, null, metadata !"Array1Glob", metadata !"Array1Glob", metadata !"", metadata !3, i32 175, metadata !41, i32 0, i32 1, [51 x i32]* @Array1Glob, null} ; [ DW_TAG_variable ] [Array1Glob] [line 175] [def]
+!41 = metadata !{i32 786454, null, metadata !"Array1Dim", metadata !3, i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ]
+!42 = metadata !{i32 786484, i32 0, null, metadata !"Array2Glob", metadata !"Array2Glob", metadata !"", metadata !3, i32 176, metadata !43, i32 0, i32 1, [51 x [51 x i32]]* @Array2Glob, null} ; [ DW_TAG_variable ] [Array2Glob] [line 176] [def]
+!43 = metadata !{i32 786454, null, metadata !"Array2Dim", metadata !3, i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ]
+!44 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
+!45 = metadata !{metadata !20, metadata !20}
+!46 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlb", metadata !"PtrGlb", metadata !"", metadata !3, i32 177, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlb, null} ; [ DW_TAG_variable ] [PtrGlb] [line 177] [def]
+!47 = metadata !{i32 786454, null, metadata !"RecordPtr", metadata !3, i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ]
+!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType]
+!49 = metadata !{i32 786454, null, metadata !"RecordType", metadata !3, i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record]
+!50 = metadata !{i32 786451, null, metadata !"Record", metadata !3, i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [from ]
+!51 = metadata !{metadata !52, metadata !54, metadata !56, metadata !57, metadata !58}
+!52 = metadata !{i32 786445, metadata !50, metadata !"PtrComp", metadata !3, i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ]
+!53 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record]
+!54 = metadata !{i32 786445, metadata !50, metadata !"Discr", metadata !3, i32 141, i64 32, i64 32, i64 64, i32 0, metadata !55} ; [ DW_TAG_member ] [Discr] [line 141, size 32, align 32, offset 64] [from Enumeration]
+!55 = metadata !{i32 786454, null, metadata !"Enumeration", metadata !3, i32 128, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_typedef ] [Enumeration] [line 128, size 0, align 0, offset 0] [from ]
+!56 = metadata !{i32 786445, metadata !50, metadata !"EnumComp", metadata !3, i32 142, i64 32, i64 32, i64 96, i32 0, metadata !55} ; [ DW_TAG_member ] [EnumComp] [line 142, size 32, align 32, offset 96] [from Enumeration]
+!57 = metadata !{i32 786445, metadata !50, metadata !"IntComp", metadata !3, i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty]
+!58 = metadata !{i32 786445, metadata !50, metadata !"StringComp", metadata !3, i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30]
+!59 = metadata !{i32 786454, null, metadata !"String30", metadata !3, i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ]
+!60 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
+!61 = metadata !{metadata !62}
+!62 = metadata !{i32 786465, i64 0, i64 31}       ; [ DW_TAG_subrange_type ] [0, 30]
+!63 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlbNext", metadata !"PtrGlbNext", metadata !"", metadata !3, i32 178, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlbNext, null} ; [ DW_TAG_variable ] [PtrGlbNext] [line 178] [def]
+!64 = metadata !{i32 181, i32 0, metadata !12, null}
+!65 = metadata !{i32 182, i32 0, metadata !12, null}
+!66 = metadata !{i32 183, i32 0, metadata !12, null}
+!67 = metadata !{i32 184, i32 0, metadata !12, null}
+!68 = metadata !{i32 189, i32 0, metadata !12, null}
+!69 = metadata !{i32 190, i32 0, metadata !12, null}
+!70 = metadata !{metadata !"int", metadata !71}
+!71 = metadata !{metadata !"omnipotent char", metadata !72}
+!72 = metadata !{metadata !"Simple C/C++ TBAA"}
+!73 = metadata !{i32 191, i32 0, metadata !12, null}
+!74 = metadata !{i32 192, i32 0, metadata !12, null}
+!75 = metadata !{i32 193, i32 0, metadata !76, null}
+!76 = metadata !{i32 786443, metadata !12, i32 193, i32 0, metadata !3, i32 0} ; [ DW_TAG_lexical_block ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c]
+!77 = metadata !{i32 194, i32 0, metadata !76, null}
+!78 = metadata !{i32 195, i32 0, metadata !12, null}
+!79 = metadata !{i32 196, i32 0, metadata !12, null}
+!80 = metadata !{i32 197, i32 0, metadata !12, null}
+!81 = metadata !{i32 198, i32 0, metadata !12, null}
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
new file mode 100644
index 0000000..5f3e0d9
--- /dev/null
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -0,0 +1,59 @@
+; RUN: llc -O0 %s -mtriple=x86_64-apple-darwin -filetype=obj -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; rdar://13071590
+; Check we are not emitting mutliple AT_const_value for a single member.
+; CHECK: .debug_info contents:
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_class_type
+; CHECK: DW_TAG_member
+; CHECK: badbit
+; CHECK: DW_AT_const_value [DW_FORM_data4]	(0x00000001)
+; CHECK-NOT: DW_AT_const_value
+; CHECK: NULL
+
+%"class.std::basic_ostream" = type { i32 (...)**, %"class.std::basic_os" }
+%"class.std::basic_os" = type { %"class.std::os_base", %"class.std::basic_ostream"*, i8, i8 }
+%"class.std::os_base" = type { i32 (...)**, i64, i64, i32, i32, i32 }
+
+@_ZSt4cout = external global %"class.std::basic_ostream"
+@.str = private unnamed_addr constant [6 x i8] c"c is \00", align 1
+
+define i32 @main() {
+entry:
+  %call1.i = tail call %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"* @_ZSt4cout, i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i64 5)
+  ret i32 0
+}
+
+declare %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"*, i8*, i64)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"student2.cpp", metadata !"/privite/tmp", metadata !"clang version 3.3 (trunk 174207)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !26}
+!4 = metadata !{i32 786489, null, metadata !"std", metadata !5, i32 48} ; [ DW_TAG_namespace ]
+!5 = metadata !{i32 786473, metadata !"os_base.h", metadata !"/privite/tmp", null} ; [ DW_TAG_file_type ]
+!25 = metadata !{i32 786472, metadata !"_S_os_fmtflags_end", i64 65536} ; [ DW_TAG_enumerator ]
+!26 = metadata !{i32 786436, metadata !4, metadata !"_Ios_Iostate", metadata !5, i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!27 = metadata !{metadata !28, metadata !29, metadata !30, metadata !31, metadata !32}
+!28 = metadata !{i32 786472, metadata !"_S_goodbit", i64 0} ; [ DW_TAG_enumerator ] [_S_goodbit :: 0]
+!29 = metadata !{i32 786472, metadata !"_S_badbit", i64 1} ; [ DW_TAG_enumerator ] [_S_badbit :: 1]
+!30 = metadata !{i32 786472, metadata !"_S_eofbit", i64 2} ; [ DW_TAG_enumerator ] [_S_eofbit :: 2]
+!31 = metadata !{i32 786472, metadata !"_S_failbit", i64 4} ; [ DW_TAG_enumerator ] [_S_failbit :: 4]
+!32 = metadata !{i32 786472, metadata !"_S_os_ostate_end", i64 65536} ; [ DW_TAG_enumerator ] [_S_os_ostate_end :: 65536]
+!49 = metadata !{i32 786434, metadata !4, metadata !"os_base", metadata !5, i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null} ; [ DW_TAG_class_type ]
+!50 = metadata !{metadata !77}
+!54 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!55 = metadata !{metadata !56}
+!56 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!77 = metadata !{i32 786445, metadata !49, metadata !"badbit", metadata !5, i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ]
+!78 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !79} ; [ DW_TAG_const_type ]
+!79 = metadata !{i32 786454, metadata !49, metadata !"ostate", metadata !5, i32 327, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_typedef ]
+!955 = metadata !{i32 0}
+!956 = metadata !{metadata !960}
+!960 = metadata !{i32 786478, i32 0, metadata !961, metadata !"main", metadata !"main", metadata !"", metadata !961, i32 73, metadata !54, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !955, i32 73} ; [ DW_TAG_subprogram ]
+!961 = metadata !{i32 786473, metadata !"student2.cpp", metadata !"/privite/tmp", null} ; [ DW_TAG_file_type ]
+!1786 = metadata !{metadata !1800}
+!1800 = metadata !{i32 786484, i32 0, metadata !5, metadata !"badbit", metadata !"badbit", metadata !"badbit", metadata !5, i32 331, metadata !78, i32 1, i32 1, i32 1, metadata !77} ; [ DW_TAG_variable ]
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index 6247cc3..fcc2912 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 %class.A = type { [42 x i32] }
 
@@ -28,10 +28,8 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 1a815f9..eead9e1 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: 0x00000027:   DW_TAG_structure_type
 ; CHECK: 0x0000002c:     DW_AT_declaration
@@ -13,10 +13,8 @@
 !llvm.module.flags = !{!9, !10, !11, !12}
 
 !0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index b0b09e7..13efe21 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
 ; FIXME: The location here needs to be fixed, but llvm-dwarfdump doesn't handle
@@ -60,10 +60,8 @@ declare void @llvm.stackrestore(i8*) nounwind
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"bar.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index f11fbe4..8e203c9 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: ptr
 ; CHECK-NOT: AT_bit_size
@@ -11,10 +11,8 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 12, metadata !"foo.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 147882)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !"foo.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720915, null, metadata !"crass", metadata !6, i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 5a001ee..b0c8f37 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; test that the DW_AT_specification is a back edge in the file.
 
@@ -32,10 +32,8 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 4, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !20}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !20}
 !5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index abb946d..3970583 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -78,10 +78,8 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta", metadata !"clang version 3.2 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
 !5 = metadata !{i32 786478, i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
@@ -205,8 +203,7 @@ entry:
 !125 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
 !126 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
 !127 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
-!128 = metadata !{metadata !129}
-!129 = metadata !{metadata !130}
+!128 = metadata !{metadata !130}
 !130 = metadata !{i32 786484, i32 0, metadata !114, metadata !"__stored_locally", metadata !"__stored_locally", metadata !"__stored_locally", metadata !6, i32 2, metadata !131, i32 1, i32 1, i1 true} ; [ DW_TAG_variable ]
 !131 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ]
 !132 = metadata !{i32 786468, null, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
index e820cb5..a72bc9b 100644
--- a/test/DebugInfo/X86/pr13303.ll
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -o %t -filetype=obj -mtriple=x86_64-unknown-linux-gnu
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 ; PR13303
 
 ; Check that the prologue ends with is_stmt here.
@@ -15,10 +15,8 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"PR13303.c", metadata !"/home/probinson", metadata !"clang version 3.2 (trunk 160143)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
 !6 = metadata !{i32 786473, metadata !"PR13303.c", metadata !"/home/probinson", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index 929db51..0651e59 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -21,10 +21,8 @@ declare i32 @callme(i32)
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
 !6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index e73869d..136db0e 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj -O0
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_rvalue_reference_type
 
@@ -23,10 +23,8 @@ declare i32 @printf(i8*, ...)
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
new file mode 100644
index 0000000..601d08f
--- /dev/null
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -0,0 +1,62 @@
+; RUN: llc -O0 %s -mtriple=x86_64-apple-darwin -filetype=obj -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; rdar://13067005
+; CHECK: .debug_info contents:
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+; CHECK: DW_AT_stmt_list [DW_FORM_data4]   (0x00000000)
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+; CHECK: DW_AT_stmt_list [DW_FORM_data4]   (0x00000049)
+
+; CHECK: .debug_line contents:
+; CHECK-NEXT: Line table prologue:
+; CHECK-NEXT: total_length: 0x00000045
+; CHECK: Line table prologue:
+; CHECK: total_length: 0x00000047
+
+define i32 @test(i32 %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !15), !dbg !16
+  %0 = load i32* %a.addr, align 4, !dbg !17
+  %call = call i32 @fn(i32 %0), !dbg !17
+  ret i32 %call, !dbg !17
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @fn(i32 %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !19), !dbg !20
+  %0 = load i32* %a.addr, align 4, !dbg !21
+  ret i32 %0, !dbg !21
+}
+
+!llvm.dbg.cu = !{!0, !10}
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"simple.c", metadata !"/private/tmp", metadata !"clang version 3.3", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
+!6 = metadata !{i32 786473, metadata !"simple.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786449, i32 0, i32 12, metadata !"simple2.c", metadata !"/private/tmp", metadata !"clang version 3.3 (trunk 172862)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1} ; [ DW_TAG_compile_unit ]
+!11 = metadata !{metadata !13}
+!13 = metadata !{i32 786478, i32 0, metadata !14, metadata !"fn", metadata !"fn", metadata !"", metadata !14, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
+!14 = metadata !{i32 786473, metadata !"simple2.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!15 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777218, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 2]
+!16 = metadata !{i32 2, i32 0, metadata !5, null}
+!17 = metadata !{i32 4, i32 0, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !5, i32 3, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786689, metadata !13, metadata !"a", metadata !14, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!20 = metadata !{i32 1, i32 0, metadata !13, null}
+!21 = metadata !{i32 2, i32 0, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !13, i32 1, i32 0, metadata !14, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index 21b0d09..1e08d54 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -6,10 +6,8 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 720913, i32 0, i32 12, metadata !"z.c", metadata !"/home/nicholas", metadata !"clang version 3.1 (trunk 143009)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !"z.c", metadata !"/home/nicholas", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index 9a04738..485aa61 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Make sure that structures have a decl file and decl line attached.
 ; CHECK: DW_TAG_structure_type [3]
@@ -14,10 +14,8 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
index 15202fb..e9d3e77 100644
--- a/test/DebugInfo/X86/subrange-type.ll
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Make sure that the base type from the subrange type has a name.
 ; CHECK: 0x0000006b:   DW_TAG_base_type [6]
@@ -21,10 +21,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
 !6 = metadata !{i32 786473, metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
index 7b61e76..0ff99cc 100644
--- a/test/DebugInfo/X86/vector.ll
+++ b/test/DebugInfo/X86/vector.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-linux-gnu -O0 -filetype=obj -o %t %s
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Generated from:
 ; clang -g -S -emit-llvm -o foo.ll foo.c
@@ -12,10 +12,8 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/Users/echristo", metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, <4 x i32>* @a} ; [ DW_TAG_variable ] [a] [line 3] [def]
 !6 = metadata !{i32 786473, metadata !"foo.c", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786454, null, metadata !"v4si", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ]
diff --git a/test/DebugInfo/debuglineinfo.test b/test/DebugInfo/debuglineinfo.test
new file mode 100644
index 0000000..14d2f82
--- /dev/null
+++ b/test/DebugInfo/debuglineinfo.test
@@ -0,0 +1,49 @@
+RUN: llvm-rtdyld -printline %p/Inputs/test-inline.o \
+RUN:   | FileCheck %s -check-prefix TEST_INLINE
+RUN: llvm-rtdyld -printline %p/Inputs/test-parameters.o \
+RUN:   | FileCheck %s -check-prefix TEST_PARAMETERS
+
+; This test verifies that relocations are correctly applied to the
+; .debug_line section and exercises DIContext::getLineInfoForAddressRange().
+; If relocations are not applied the first two functions will be reported as
+; both starting at address zero in the; line number table.
+TEST_INLINE:      Function: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+TEST_INLINE-NEXT: Line info @ 0: test-inline.cpp, line:33
+TEST_INLINE-NEXT: Line info @ 35: test-inline.cpp, line:34
+TEST_INLINE-NEXT: Line info @ 165: test-inline.cpp, line:35
+TEST_INLINE-NEXT: Function: _Z3foov, Size = 3
+TEST_INLINE-NEXT: Line info @ 0: test-inline.cpp, line:28
+TEST_INLINE-NEXT: Line info @ 2: test-inline.cpp, line:29
+TEST_INLINE-NEXT: Line info @ 3: test-inline.cpp, line:29
+TEST_INLINE-NEXT: Function: main, Size = 146
+TEST_INLINE-NEXT: Line info @ 0: test-inline.cpp, line:39
+TEST_INLINE-NEXT: Line info @ 21: test-inline.cpp, line:41
+TEST_INLINE-NEXT: Line info @ 39: test-inline.cpp, line:42
+TEST_INLINE-NEXT: Line info @ 60: test-inline.cpp, line:44
+TEST_INLINE-NEXT: Line info @ 80: test-inline.cpp, line:48
+TEST_INLINE-NEXT: Line info @ 90: test-inline.cpp, line:45
+TEST_INLINE-NEXT: Line info @ 95: test-inline.cpp, line:46
+TEST_INLINE-NEXT: Line info @ 114: test-inline.cpp, line:48 
+TEST_INLINE-NEXT: Line info @ 141: test-inline.cpp, line:49
+TEST_INLINE-NEXT: Line info @ 146: test-inline.cpp, line:49
+
+; This test checks the case where all code is in a single section.
+TEST_PARAMETERS:      Function: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+TEST_PARAMETERS-NEXT: Line info @ 0: test-parameters.cpp, line:33
+TEST_PARAMETERS-NEXT: Line info @ 35: test-parameters.cpp, line:34
+TEST_PARAMETERS-NEXT: Line info @ 165: test-parameters.cpp, line:35
+TEST_PARAMETERS-NEXT: Function: _Z3foov, Size = 3
+TEST_PARAMETERS-NEXT: Line info @ 0: test-parameters.cpp, line:28
+TEST_PARAMETERS-NEXT: Line info @ 2: test-parameters.cpp, line:29
+TEST_PARAMETERS-NEXT: Function: main, Size = 146
+TEST_PARAMETERS-NEXT: Line info @ 0: test-parameters.cpp, line:39
+TEST_PARAMETERS-NEXT: Line info @ 21: test-parameters.cpp, line:41
+TEST_PARAMETERS-NEXT: Line info @ 39: test-parameters.cpp, line:42
+TEST_PARAMETERS-NEXT: Line info @ 60: test-parameters.cpp, line:44
+TEST_PARAMETERS-NEXT: Line info @ 80: test-parameters.cpp, line:48
+TEST_PARAMETERS-NEXT: Line info @ 90: test-parameters.cpp, line:45
+TEST_PARAMETERS-NEXT: Line info @ 95: test-parameters.cpp, line:46
+TEST_PARAMETERS-NEXT: Line info @ 114: test-parameters.cpp, line:48 
+TEST_PARAMETERS-NEXT: Line info @ 141: test-parameters.cpp, line:49
+TEST_PARAMETERS-NEXT: Line info @ 146: test-parameters.cpp, line:49
+
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
new file mode 100644
index 0000000..4ef4197
--- /dev/null
+++ b/test/DebugInfo/dwarf-public-names.ll
@@ -0,0 +1,124 @@
+; RUN: llc -generate-dwarf-pubnames -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s
+;
+; ModuleID = 'dwarf-public-names.cpp'
+;
+; Generated from:
+;
+; struct C {
+;   void member_function();
+;   static int static_member_function();
+;   static int static_member_variable;
+; };
+;
+; int C::static_member_variable = 0;
+;
+; void C::member_function() {
+;   static_member_variable = 0;
+; }
+;
+; int C::static_member_function() {
+;   return static_member_variable;
+; }
+;
+; C global_variable;
+;
+; int global_function() {
+;   return -1;
+; }
+;
+; namespace ns {
+;   void global_namespace_function() {
+;     global_variable.member_function();
+;   }
+;   int global_namespace_variable = 1;
+; }
+
+; Skip the output to the header of the pubnames section.
+; CHECK: debug_pubnames
+
+; Check for each name in the output.
+; CHECK: global_namespace_variable
+; CHECK: global_namespace_function
+; CHECK: static_member_function
+; CHECK: global_variable
+; CHECK: global_function
+; CHECK: member_function
+
+%struct.C = type { i8 }
+
+@_ZN1C22static_member_variableE = global i32 0, align 4
+@global_variable = global %struct.C zeroinitializer, align 1
+@_ZN2ns25global_namespace_variableE = global i32 1, align 4
+
+define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !28), !dbg !30
+  %this1 = load %struct.C** %this.addr
+  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
+  ret void, !dbg !32
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
+entry:
+  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
+  ret i32 %0, !dbg !33
+}
+
+define i32 @_Z15global_functionv() nounwind uwtable {
+entry:
+  ret i32 -1, !dbg !34
+}
+
+define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable {
+entry:
+  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
+  ret void, !dbg !36
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t", metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
+!3 = metadata !{i32 786478, i32 0, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", metadata !4, i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!4 = metadata !{i32 786473, metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t", null} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{null, metadata !7}
+!7 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
+!8 = metadata !{i32 786451, null, metadata !"C", metadata !4, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [from ]
+!9 = metadata !{metadata !10, metadata !12, metadata !14}
+!10 = metadata !{i32 786445, metadata !8, metadata !"static_member_variable", metadata !4, i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 786478, i32 0, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", metadata !4, i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!14 = metadata !{i32 786478, i32 0, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", metadata !4, i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !11}
+!17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!18 = metadata !{i32 786478, i32 0, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", metadata !4, i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!19 = metadata !{i32 786478, i32 0, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", metadata !4, i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!20 = metadata !{i32 786478, i32 0, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", metadata !4, i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
+!22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = metadata !{null}
+!24 = metadata !{metadata !25, metadata !26, metadata !27}
+!25 = metadata !{i32 786484, i32 0, metadata !8, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !4, i32 7, metadata !11, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!26 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !4, i32 17, metadata !8, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
+!27 = metadata !{i32 786484, i32 0, metadata !21, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !4, i32 27, metadata !11, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
+!28 = metadata !{i32 786689, metadata !3, metadata !"this", metadata !4, i32 16777225, metadata !29, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 9]
+!29 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
+!30 = metadata !{i32 9, i32 0, metadata !3, null}
+!31 = metadata !{i32 10, i32 0, metadata !3, null}
+!32 = metadata !{i32 11, i32 0, metadata !3, null}
+!33 = metadata !{i32 14, i32 0, metadata !18, null}
+!34 = metadata !{i32 20, i32 0, metadata !19, null}
+!35 = metadata !{i32 25, i32 0, metadata !20, null}
+!36 = metadata !{i32 26, i32 0, metadata !20, null}
diff --git a/test/DebugInfo/dwarfdump-debug-frame-simple.test b/test/DebugInfo/dwarfdump-debug-frame-simple.test
new file mode 100644
index 0000000..c2427d8
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-debug-frame-simple.test
@@ -0,0 +1,28 @@
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-32bit.elf.o -debug-dump=frames | FileCheck %s -check-prefix FRAMES
+; Note: the input file was generated from Inputs/dwarfdump-test-32bit.elf.c
+
+; FRAMES: .debug_frame
+; FRAMES-NOT: .eh_frame
+
+; FRAMES: 00000000 00000010 ffffffff CIE
+; FRAMES: Version: 1
+; FRAMES:      DW_CFA_def_cfa
+; FRAMES-NEXT: DW_CFA_offset
+; FRAMES-NEXT: DW_CFA_nop
+; FRAMES-NEXT: DW_CFA_nop
+
+; FRAMES: 00000014 00000010 00000000 FDE cie=00000000 pc=00000000...00000022
+; FRAMES:      DW_CFA_advance_loc
+; FRAMES-NEXT: DW_CFA_def_cfa_offset
+; FRAMES-NEXT: DW_CFA_nop
+
+; FRAMES: 00000028 00000014 00000000 FDE cie=00000000 pc=00000030...00000080
+; FRAMES:      DW_CFA_advance_loc
+; FRAMES-NEXT: DW_CFA_def_cfa_offset
+; FRAMES-NEXT: DW_CFA_offset
+; FRAMES-NEXT: DW_CFA_advance_loc
+; FRAMES-NEXT: DW_CFA_def_cfa_register
+
+; FRAMES-NOT: CIE
+; FRAMES-NOT: FDE
+
diff --git a/test/DebugInfo/dwarfdump-dump-flags.test b/test/DebugInfo/dwarfdump-dump-flags.test
new file mode 100644
index 0000000..92b2d50
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-dump-flags.test
@@ -0,0 +1,13 @@
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=all | FileCheck %s -check-prefix DUMP_ALL
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=info | FileCheck %s -check-prefix DUMP_INFO
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=ranges | FileCheck %s -check-prefix DUMP_RANGES
+
+; DUMP_ALL: .debug_info
+; DUMP_ALL: .debug_ranges
+
+; DUMP_INFO: .debug_info
+; DUMP_INFO-NOT: .debug_ranges
+
+; DUMP_RANGES-NOT: .debug_info
+; DUMP_RANGES: .debug_ranges
+
diff --git a/test/DebugInfo/dwarfdump-inlining.test b/test/DebugInfo/dwarfdump-inlining.test
index d3a7e12..e926634 100644
--- a/test/DebugInfo/dwarfdump-inlining.test
+++ b/test/DebugInfo/dwarfdump-inlining.test
@@ -1,28 +1,28 @@
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x613 \
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x710 \
 RUN:   --inlining --functions | FileCheck %s -check-prefix DEEP_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x6de \
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x7d1 \
 RUN:   --inlining | FileCheck %s -check-prefix SHORTER_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x685 \
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x785 \
 RUN:   --inlining | FileCheck %s -check-prefix SHORT_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x640 \
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x737 \
 RUN:   --functions | FileCheck %s -check-prefix INL_FUNC_NAME
 
 DEEP_STACK:      inlined_h
-DEEP_STACK-NEXT: header.h:2:21
+DEEP_STACK-NEXT: dwarfdump-inl-test.h:2
 DEEP_STACK-NEXT: inlined_g
-DEEP_STACK-NEXT: header.h:7
+DEEP_STACK-NEXT: dwarfdump-inl-test.h:7
 DEEP_STACK-NEXT: inlined_f
-DEEP_STACK-NEXT: main.cc:3
+DEEP_STACK-NEXT: dwarfdump-inl-test.cc:3
 DEEP_STACK-NEXT: main
-DEEP_STACK-NEXT: main.cc:8
+DEEP_STACK-NEXT: dwarfdump-inl-test.cc:8
 
-SHORTER_STACK:      header.h:7:20
-SHORTER_STACK-NEXT: main.cc:3
-SHORTER_STACK-NEXT: main.cc:8
+SHORTER_STACK:      dwarfdump-inl-test.h:7
+SHORTER_STACK-NEXT: dwarfdump-inl-test.cc:3
+SHORTER_STACK-NEXT: dwarfdump-inl-test.cc:8
 
-SHORT_STACK:      main.cc:3:20
-SHORT_STACK-NEXT: main.cc:8
+SHORT_STACK:      dwarfdump-inl-test.cc:3
+SHORT_STACK-NEXT: dwarfdump-inl-test.cc:8
 
 INL_FUNC_NAME:      inlined_g
-INL_FUNC_NAME-NEXT: header.h:7:20
+INL_FUNC_NAME-NEXT: dwarfdump-inl-test.h:7
 
diff --git a/test/DebugInfo/dwarfdump-pubnames.test b/test/DebugInfo/dwarfdump-pubnames.test
new file mode 100644
index 0000000..e1b16c2
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-pubnames.test
@@ -0,0 +1,16 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-pubnames.elf-x86-64 \
+RUN:   -debug-dump=pubnames | FileCheck %s
+
+CHECK: .debug_pubnames contents:
+CHECK: Length:                161
+CHECK: Version:               2
+CHECK: Offset in .debug_info: 0
+CHECK: Size:                  321
+
+CHECK:  Offset    Name
+CHECK:      98    global_namespace_variable
+CHECK:      a7    global_namespace_function
+CHECK:      ec    static_member_function
+CHECK:      7c    global_variable
+CHECK:     103    global_function
+CHECK:      c2    member_function
diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test
index 973c344..355445e 100644
--- a/test/DebugInfo/dwarfdump-test.test
+++ b/test/DebugInfo/dwarfdump-test.test
@@ -1,56 +1,56 @@
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64  \
-RUN:   --address=0x400589 --functions | FileCheck %s -check-prefix MAIN
+RUN:   --address=0x400559 --functions | FileCheck %s -check-prefix MAIN
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64  \
-RUN:   --address=0x400558 --functions | FileCheck %s -check-prefix FUNCTION
+RUN:   --address=0x400528 --functions | FileCheck %s -check-prefix FUNCTION
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 \
-RUN:   --address=0x4005b6 --functions | FileCheck %s -check-prefix CTOR_WITH_SPEC
+RUN:   --address=0x400586 --functions | FileCheck %s -check-prefix CTOR_WITH_SPEC
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
-RUN:   --address=0x4004b8 --functions | FileCheck %s -check-prefix MANY_CU_1
+RUN:   --address=0x4004e8 --functions | FileCheck %s -check-prefix MANY_CU_1
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
-RUN:   --address=0x4004c4 --functions | FileCheck %s -check-prefix MANY_CU_2
+RUN:   --address=0x4004f4 --functions | FileCheck %s -check-prefix MANY_CU_2
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
-RUN:   --address=0x580 --functions | FileCheck %s -check-prefix ABS_ORIGIN_1
+RUN:   --address=0x640 --functions | FileCheck %s -check-prefix ABS_ORIGIN_1
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
-RUN:   --address=0x573 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
+RUN:   --address=0x633 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
-RUN:   --address=0x56d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
+RUN:   --address=0x62d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
-RUN:   --address=0x55c --functions \
+RUN:   --address=0x62c --functions \
 RUN:   | FileCheck %s -check-prefix MANY_SEQ_IN_LINE_TABLE
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
 RUN:   | FileCheck %s -check-prefix DEBUG_RANGES
 
 MAIN: main
-MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16:10
+MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
 
 FUNCTION: _Z1fii
-FUNCTION-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:11:18
+FUNCTION-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:11
 
-CTOR_WITH_SPEC: _ZN10DummyClassC1Ei
-CTOR_WITH_SPEC-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:4:30
+CTOR_WITH_SPEC: DummyClass
+CTOR_WITH_SPEC-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:4
 
 MANY_CU_1: a
-MANY_CU_1-NEXT: /tmp/dbginfo{{[/\\]}}a.cc:2:0
+MANY_CU_1-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test2-helper.cc:2
 
 MANY_CU_2: main
-MANY_CU_2-NEXT: /tmp/dbginfo{{[/\\]}}main.cc:4:0
+MANY_CU_2-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test2-main.cc:4
 
 ABS_ORIGIN_1: C
-ABS_ORIGIN_1-NEXT: /tmp/dbginfo{{[/\\]}}def2.cc:4:0
+ABS_ORIGIN_1-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test3.cc:3
 
-INCLUDE_TEST_1: _Z3do2v
-INCLUDE_TEST_1-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}decl2.h:1:0
+INCLUDE_TEST_1: _Z3do1v
+INCLUDE_TEST_1-NEXT: /tmp/include{{[/\\]}}dwarfdump-test3-decl.h:7
 
-INCLUDE_TEST_2: _Z3do1v
-INCLUDE_TEST_2-NEXT: /tmp/include{{[/\\]}}decl.h:5:0
+INCLUDE_TEST_2: _Z3do2v
+INCLUDE_TEST_2-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}dwarfdump-test3-decl2.h:1
 
 MANY_SEQ_IN_LINE_TABLE: _Z1cv
-MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo/sequences{{[/\\]}}c.cc:2:0
+MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test4-part1.cc:2
 
 DEBUG_RANGES:      .debug_ranges contents:
-DEBUG_RANGES-NEXT: 00000000 000000000000055c 0000000000000567
-DEBUG_RANGES-NEXT: 00000000 0000000000000567 000000000000056d
+DEBUG_RANGES-NEXT: 00000000 000000000000062c 0000000000000637
+DEBUG_RANGES-NEXT: 00000000 0000000000000637 000000000000063d
 DEBUG_RANGES-NEXT: 00000000 <End of list>
-DEBUG_RANGES-NEXT: 00000030 0000000000000570 000000000000057b
-DEBUG_RANGES-NEXT: 00000030 0000000000000567 000000000000056d
+DEBUG_RANGES-NEXT: 00000030 0000000000000640 000000000000064b
+DEBUG_RANGES-NEXT: 00000030 0000000000000637 000000000000063d
 DEBUG_RANGES-NEXT: 00000030 <End of list>
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index ed4e7da..b25f3fa 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -4,8 +4,8 @@
 
 define i32 @main() uwtable {
 entry:
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !18), !dbg !21
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !22), !dbg !23
+  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !18), !dbg !21
+  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !22), !dbg !23
   tail call void @smth(i32 0), !dbg !24
   tail call void @smth(i32 0), !dbg !25
   ret i32 0, !dbg !19
@@ -18,10 +18,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202", metadata !"clang version 3.2 (trunk 159419)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !10}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !10}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
@@ -39,19 +37,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 ; ARGUMENT-NOT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 
-!16 = metadata !{i32 786688, metadata !17, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 786688, metadata !10, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 
 ; Two DW_TAG_variable: one abstract and one inlined.
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE-NOT: {{.*Abbrev.*DW_TAG_variable}}
 
-!17 = metadata !{i32 786443, metadata !10, i32 3, i32 35, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{i32 786689, metadata !10, metadata !"argument", metadata !6, i32 16777219, metadata !9, i32 0, metadata !19} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 11, i32 10, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !5, i32 10, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 11, i32 10, metadata !5, null}
 !21 = metadata !{i32 3, i32 25, metadata !10, metadata !19}
-!22 = metadata !{i32 786688, metadata !17, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, metadata !19} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 4, i32 16, metadata !17, metadata !19}
-!24 = metadata !{i32 5, i32 3, metadata !17, metadata !19}
-!25 = metadata !{i32 6, i32 3, metadata !17, metadata !19}
+!22 = metadata !{i32 786688, metadata !10, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, metadata !19} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 4, i32 16, metadata !10, metadata !19}
+!24 = metadata !{i32 5, i32 3, metadata !10, metadata !19}
+!25 = metadata !{i32 6, i32 3, metadata !10, metadata !19}
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll
index 47874d9..1dbadf2 100644
--- a/test/DebugInfo/member-pointers.ll
+++ b/test/DebugInfo/member-pointers.ll
@@ -1,7 +1,12 @@
 ; RUN: llc -filetype=obj -O0 < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; CHECK: DW_TAG_ptr_to_member_type
+; CHECK: [[TYPE:.*]]:   DW_TAG_subroutine_type
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_type
+; CHECK-NEXT: DW_AT_artificial [DW_FORM_flag_present]
 ; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
 ; IR generated from clang -g with the following source:
 ; struct S {
 ; };
@@ -15,16 +20,15 @@
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch", metadata !"clang version 3.3 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !10}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i64* @x} ; [ DW_TAG_variable ] [x] [line 2] [def]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !10}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !7, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
 !6 = metadata !{i32 786473, metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
 !8 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786451, null, metadata !"S", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ]
-!10 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 3, metadata !11, i32 0, i32 1, { i64, i64 }* @y} ; [ DW_TAG_variable ] [y] [line 3] [def]
+!9 = metadata !{i32 786451, null, metadata !"S", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ]
+!10 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 5, metadata !11, i32 0, i32 1, { i64, i64 }* @y, null} ; [ DW_TAG_variable ] [y] [line 5] [def]
 !11 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{null, metadata !8}
+!13 = metadata !{null, metadata !14, metadata !8}
+!14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S]
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
index d01aeea..fe50920 100644
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -4,7 +4,7 @@
 ;
 
 ; RUN: llc %s -o %t -filetype=obj -O0
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; ModuleID = 'test.bc'
 
@@ -33,17 +33,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0, !9}
 
 !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang version 3.2 (trunk 156513)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"foo.c", metadata !"/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang version 3.2 (trunk 156513)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1} ; [ DW_TAG_compile_unit ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{metadata !12}
+!10 = metadata !{metadata !12}
 !12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
 !13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !14 = metadata !{metadata !15, metadata !15, metadata !16}
diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll
index 4c03519..e04bf03 100644
--- a/test/ExecutionEngine/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
+; XFAIL: armv7
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
index 28cc54a..babd8f6 100644
--- a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
index 9f89598..bbb81b8 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
 	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
index 997b2a9..7574267 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
index ba35b5b..261939a 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
index f3c88ad..f76f998 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; We were accidentally inverting the signedness of right shifts.  Whoops.
 
diff --git a/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
index f925e79..2b83bb9 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
index 5b426f6..d1ca2be 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @bar(i8* %X) {
         ; pointer should be 4 byte aligned!
diff --git a/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
index c0a7393..20ef0ff 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
@@ -1,6 +1,6 @@
 ; This testcase should return with an exit code of 1.
 ;
-; RUN: not %lli -mtriple=%mcjit_triple -use-mcjit %s
+; RUN: not %lli_mcjit %s
 
 @test = global i64 0		; <i64*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
index d3e6204..c7bcc54 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s test
+; RUN: %lli_mcjit %s test
 
 declare i32 @puts(i8*)
 
diff --git a/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
index 55a1697..0512575 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
index 79c6e7f..c292a81 100644
--- a/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
index ffd6df6..c0a83f5 100644
--- a/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
index 90839e9..55ce689 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; This testcase failed to work because two variable sized allocas confused the
 ; local register allocator.
diff --git a/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
index 29ef2c5..2e99996 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ;
 ; Regression Test: EnvironmentTest.ll
diff --git a/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
index 2adb608..659901b 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; This testcase exposes a bug in the local register allocator where it runs out
 ; of registers (due to too many overlapping live ranges), but then attempts to
diff --git a/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index 91bde46..68e31a7 100644
--- a/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @A = global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
index a7462d9..69f4ec8 100644
--- a/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
+++ b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
@@ -1,6 +1,6 @@
 ; PR672
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s
-; XFAIL: mcjit-ia32
+; RUN: %lli_mcjit %s
+; XFAIL: mcjit-ia32, armv5
 
 define i32 @main() {
 	%f = bitcast i32 (i32, i32*, i32)* @check_tail to i32*		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
index 2406596..43188f2 100644
--- a/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
+++ b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -force-interpreter %s
+; RUN: %lli_mcjit -force-interpreter %s
 ; PR1836
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
index d429d51..0912897 100644
--- a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -force-interpreter=true %s | grep 1
+; RUN: %lli_mcjit -force-interpreter=true %s | grep 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
index a6d18e7..7ed0e38 100644
--- a/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
+++ b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -force-interpreter=true %s > /dev/null
+; RUN: %lli_mcjit -force-interpreter=true %s > /dev/null
 
 define i32 @main() {
        %a = add i32 0, undef
diff --git a/test/ExecutionEngine/MCJIT/fpbitcast.ll b/test/ExecutionEngine/MCJIT/fpbitcast.ll
index bb4957e..fb5ab6f 100644
--- a/test/ExecutionEngine/MCJIT/fpbitcast.ll
+++ b/test/ExecutionEngine/MCJIT/fpbitcast.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -force-interpreter=true %s | grep 40091eb8
+; RUN: %lli_mcjit -force-interpreter=true %s | grep 40091eb8
 ;
 define i32 @test(double %x) {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/hello.ll b/test/ExecutionEngine/MCJIT/hello.ll
index ceb9c12..b744707 100644
--- a/test/ExecutionEngine/MCJIT/hello.ll
+++ b/test/ExecutionEngine/MCJIT/hello.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/hello2.ll b/test/ExecutionEngine/MCJIT/hello2.ll
index 756fcad..cd033d5 100644
--- a/test/ExecutionEngine/MCJIT/hello2.ll
+++ b/test/ExecutionEngine/MCJIT/hello2.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @X = global i32 7		; <i32*> [#uses=0]
 @msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/pr13727.ll b/test/ExecutionEngine/MCJIT/pr13727.ll
index c33bf32..1c719c5 100644
--- a/test/ExecutionEngine/MCJIT/pr13727.ll
+++ b/test/ExecutionEngine/MCJIT/pr13727.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 -disable-lazy-compilation=false %s
+; RUN: %lli_mcjit -O0 -disable-lazy-compilation=false %s
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/simplesttest.ll b/test/ExecutionEngine/MCJIT/simplesttest.ll
index 02ad006..318baf4 100644
--- a/test/ExecutionEngine/MCJIT/simplesttest.ll
+++ b/test/ExecutionEngine/MCJIT/simplesttest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/simpletest-remote.ll
index 272204c..9ceaf54 100644
--- a/test/ExecutionEngine/MCJIT/simpletest-remote.ll
+++ b/test/ExecutionEngine/MCJIT/simpletest-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit %s > /dev/null
+; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
 ; XFAIL: arm, mips
 
 define i32 @bar() {
diff --git a/test/ExecutionEngine/MCJIT/simpletest.ll b/test/ExecutionEngine/MCJIT/simpletest.ll
index 958b783..5b0f2dd 100644
--- a/test/ExecutionEngine/MCJIT/simpletest.ll
+++ b/test/ExecutionEngine/MCJIT/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @bar() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/stubs-remote.ll b/test/ExecutionEngine/MCJIT/stubs-remote.ll
index 4c7684f..15cb5d0 100644
--- a/test/ExecutionEngine/MCJIT/stubs-remote.ll
+++ b/test/ExecutionEngine/MCJIT/stubs-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit -disable-lazy-compilation=false %s
+; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false %s
 ; XFAIL: arm, mips
 
 define i32 @main() nounwind {
diff --git a/test/ExecutionEngine/MCJIT/stubs.ll b/test/ExecutionEngine/MCJIT/stubs.ll
index 9e5d5b2..f4aac33 100644
--- a/test/ExecutionEngine/MCJIT/stubs.ll
+++ b/test/ExecutionEngine/MCJIT/stubs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -disable-lazy-compilation=false %s
+; RUN: %lli_mcjit -disable-lazy-compilation=false %s
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-arith.ll b/test/ExecutionEngine/MCJIT/test-arith.ll
index b73227f..e1cc23b 100644
--- a/test/ExecutionEngine/MCJIT/test-arith.ll
+++ b/test/ExecutionEngine/MCJIT/test-arith.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%A = add i8 0, 12		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-branch.ll b/test/ExecutionEngine/MCJIT/test-branch.ll
index 8f3c727..cdf1035 100644
--- a/test/ExecutionEngine/MCJIT/test-branch.ll
+++ b/test/ExecutionEngine/MCJIT/test-branch.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; test unconditional branch
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
index 20150b2..8a36cf2 100644
--- a/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
+++ b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @_Z14func_exit_codev() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-call.ll b/test/ExecutionEngine/MCJIT/test-call.ll
index 51d19fe..ed593e3 100644
--- a/test/ExecutionEngine/MCJIT/test-call.ll
+++ b/test/ExecutionEngine/MCJIT/test-call.ll
@@ -1,4 +1,5 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
+; XFAIL: armv5
 
 declare void @exit(i32)
 
diff --git a/test/ExecutionEngine/MCJIT/test-cast.ll b/test/ExecutionEngine/MCJIT/test-cast.ll
index dcc97f4..335ec50 100644
--- a/test/ExecutionEngine/MCJIT/test-cast.ll
+++ b/test/ExecutionEngine/MCJIT/test-cast.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @foo() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll b/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
index d666a2a..989a473 100644
--- a/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s
+; RUN: %lli_mcjit -O0 %s
 
 ; This test checks that common symbols have been allocated addresses honouring
 ; the alignment requirement.
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
index 285ce5c..3b8ee9d 100644
--- a/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit -O0 -disable-lazy-compilation=false %s
+; RUN: %lli_mcjit -remote-mcjit -O0 -disable-lazy-compilation=false %s
 ; XFAIL: arm, mips
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols.ll b/test/ExecutionEngine/MCJIT/test-common-symbols.ll
index 8c81902..13ee06a 100644
--- a/test/ExecutionEngine/MCJIT/test-common-symbols.ll
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 -disable-lazy-compilation=false %s
+; RUN: %lli_mcjit -O0 -disable-lazy-compilation=false %s
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/test-constantexpr.ll b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
index 56c1290..8f15cbd 100644
--- a/test/ExecutionEngine/MCJIT/test-constantexpr.ll
+++ b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; This tests to make sure that we can evaluate weird constant expressions
 
diff --git a/test/ExecutionEngine/MCJIT/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
index a1591d0..9daf168 100644
--- a/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
+++ b/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
@@ -1,5 +1,5 @@
-; RUN:  %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit -O0 %s
-; XFAIL: arm, mips
+; RUN:  %lli_mcjit -remote-mcjit -O0 %s
+; XFAIL: armv7, mips
 
 ; Check that a variable is always aligned as specified.
 
diff --git a/test/ExecutionEngine/MCJIT/test-data-align.ll b/test/ExecutionEngine/MCJIT/test-data-align.ll
index 0493cba..2472d95 100644
--- a/test/ExecutionEngine/MCJIT/test-data-align.ll
+++ b/test/ExecutionEngine/MCJIT/test-data-align.ll
@@ -1,4 +1,4 @@
-; RUN:  %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s
+; RUN:  %lli_mcjit -O0 %s
 
 ; Check that a variable is always aligned as specified.
 
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
index 69c73b9..847d225 100644
--- a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
+++ b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit %s > /dev/null
+; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
 ; XFAIL: arm, mips
 
 define double @test(double* %DP, double %Arg) {
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
index 7af1d8b..f094f3d 100644
--- a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
+++ b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-fp.ll b/test/ExecutionEngine/MCJIT/test-fp.ll
index f7e6fb9..b10e9d6 100644
--- a/test/ExecutionEngine/MCJIT/test-fp.ll
+++ b/test/ExecutionEngine/MCJIT/test-fp.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-global-ctors.ll b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
index fbe9118..4510d9b 100644
--- a/test/ExecutionEngine/MCJIT/test-global-ctors.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 @var = global i32 1, align 4
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @ctor_func }]
 @llvm.global_dtors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @dtor_func }]
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
index 8b7c83e..b8d94b5 100644
--- a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit %s > /dev/null
+; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
 ; XFAIL: arm, mips
 
 @count = global i32 1, align 4
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
index ec6cbad..b9f74b8 100644
--- a/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @count = global i32 1, align 4
 
diff --git a/test/ExecutionEngine/MCJIT/test-global.ll b/test/ExecutionEngine/MCJIT/test-global.ll
index e7972f9..6a8c042 100644
--- a/test/ExecutionEngine/MCJIT/test-global.ll
+++ b/test/ExecutionEngine/MCJIT/test-global.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @count = global i32 0, align 4
 
diff --git a/test/ExecutionEngine/MCJIT/test-loadstore.ll b/test/ExecutionEngine/MCJIT/test-loadstore.ll
index f450d0a..9038194 100644
--- a/test/ExecutionEngine/MCJIT/test-loadstore.ll
+++ b/test/ExecutionEngine/MCJIT/test-loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
 	%V = load i8* %P		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-local.ll b/test/ExecutionEngine/MCJIT/test-local.ll
index d4e9f44..d7c1734 100644
--- a/test/ExecutionEngine/MCJIT/test-local.ll
+++ b/test/ExecutionEngine/MCJIT/test-local.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-logical.ll b/test/ExecutionEngine/MCJIT/test-logical.ll
index 32f45ef..a03833e 100644
--- a/test/ExecutionEngine/MCJIT/test-logical.ll
+++ b/test/ExecutionEngine/MCJIT/test-logical.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%A = and i8 4, 8		; <i8> [#uses=2]
diff --git a/test/ExecutionEngine/MCJIT/test-loop.ll b/test/ExecutionEngine/MCJIT/test-loop.ll
index ebc6896..5ed8c40 100644
--- a/test/ExecutionEngine/MCJIT/test-loop.ll
+++ b/test/ExecutionEngine/MCJIT/test-loop.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/MCJIT/test-phi.ll b/test/ExecutionEngine/MCJIT/test-phi.ll
index 1408533..4245cca 100644
--- a/test/ExecutionEngine/MCJIT/test-phi.ll
+++ b/test/ExecutionEngine/MCJIT/test-phi.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; test phi node
 @Y = global i32 6		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
index 773e4a1..f2c2cd6 100644
--- a/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit -O0 %s
+; RUN: %lli_mcjit -remote-mcjit -O0 %s
 ; XFAIL: arm, mips
 
 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
index 93b6a6d..871d8bf 100644
--- a/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s
+; RUN: %lli_mcjit -O0 %s
 
 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
 @ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/MCJIT/test-ret.ll b/test/ExecutionEngine/MCJIT/test-ret.ll
index af28292..6bfc480 100644
--- a/test/ExecutionEngine/MCJIT/test-ret.ll
+++ b/test/ExecutionEngine/MCJIT/test-ret.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; test return instructions
 define void @test1() {
diff --git a/test/ExecutionEngine/MCJIT/test-return.ll b/test/ExecutionEngine/MCJIT/test-return.ll
index 67f7107..4db1c3f 100644
--- a/test/ExecutionEngine/MCJIT/test-return.ll
+++ b/test/ExecutionEngine/MCJIT/test-return.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-fp.ll b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
index a8f4bd8..b4367d0 100644
--- a/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
+++ b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-int.ll b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
index ed52b50..8c7d815 100644
--- a/test/ExecutionEngine/MCJIT/test-setcond-int.ll
+++ b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%int1 = add i32 0, 0		; <i32> [#uses=6]
diff --git a/test/ExecutionEngine/MCJIT/test-shift.ll b/test/ExecutionEngine/MCJIT/test-shift.ll
index 5a5c10d..8d9a94e 100644
--- a/test/ExecutionEngine/MCJIT/test-shift.ll
+++ b/test/ExecutionEngine/MCJIT/test-shift.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%shamt = add i8 0, 1		; <i8> [#uses=8]
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index f034326..dd6a5bb 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -7,6 +7,6 @@ def getRoot(config):
 
 root = getRoot(config)
 
-if root.host_arch in ['PowerPC']:
+if root.host_arch in ['PowerPC', 'AArch64']:
     config.unsupported = True
 
diff --git a/test/ExecutionEngine/test-call.ll b/test/ExecutionEngine/test-call.ll
index 3fd39fe..563d486 100644
--- a/test/ExecutionEngine/test-call.ll
+++ b/test/ExecutionEngine/test-call.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
+; XFAIL: armv7
 
 declare void @exit(i32)
 
diff --git a/test/ExecutionEngine/test-fp-no-external-funcs.ll b/test/ExecutionEngine/test-fp-no-external-funcs.ll
index 139b2ef..92cc0d6 100644
--- a/test/ExecutionEngine/test-fp-no-external-funcs.ll
+++ b/test/ExecutionEngine/test-fp-no-external-funcs.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli  %s > /dev/null
-; XFAIL: arm
+; XFAIL: armv7
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index c906450..68a8182 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
+; XFAIL: armv7
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/Feature/attributes.ll b/test/Feature/attributes.ll
new file mode 100644
index 0000000..7707d82
--- /dev/null
+++ b/test/Feature/attributes.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@.str = private unnamed_addr constant [14 x i8] c"hello world!\0A\00", align 1
+
+define void @foo() #0 {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { nounwind ssp uwtable }
diff --git a/test/Feature/intrinsics.ll b/test/Feature/intrinsics.ll
index 9e7dc6d..28be053 100644
--- a/test/Feature/intrinsics.ll
+++ b/test/Feature/intrinsics.ll
@@ -61,10 +61,14 @@ define void @libm() {
 ; FIXME: test ALL the intrinsics in this file.
 
 ; rdar://11542750
-; CHECK: declare void @llvm.trap() noreturn nounwind
+; CHECK: declare void @llvm.trap() #2
 declare void @llvm.trap()
 
 define void @trap() {
   call void @llvm.trap()
   ret void
 }
+
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind readonly }
+; CHECK: attributes #2 = { noreturn nounwind }
diff --git a/test/Feature/minsize_attr.ll b/test/Feature/minsize_attr.ll
index 51b133c..1f915b3 100644
--- a/test/Feature/minsize_attr.ll
+++ b/test/Feature/minsize_attr.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
 define void @test1() minsize {
-; CHECK: define void @test1() minsize
+; CHECK: define void @test1() #0
         ret void
 }
 
+; CHECK: attributes #0 = { minsize }
diff --git a/test/FileCheck/dos-style-eol.txt b/test/FileCheck/dos-style-eol.txt
new file mode 100644
index 0000000..4252aad
--- /dev/null
+++ b/test/FileCheck/dos-style-eol.txt
@@ -0,0 +1,11 @@
+// Test for using FileCheck on DOS style end-of-line
+// This test was deliberately committed with DOS style end of line.
+// Don't change line endings!
+// RUN: FileCheck -input-file %s %s
+// RUN: FileCheck  --strict-whitespace -input-file %s %s
+
+LINE 1
+; CHECK: {{^}}LINE 1{{$}}
+
+LINE 2
+; CHECK: {{^}}LINE 2{{$}}
+\ No newline at end of file
diff --git a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
index 35c5c4a..38168fc 100644
--- a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -36,14 +36,14 @@ target triple = "i386-unknown-linux-gnu"
 @ff_mlp_firorder_7 = external global i8
 @ff_mlp_firorder_8 = external global i8
 
-define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind address_safety {
+define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind sanitize_address {
 entry:
   %mlp_filter_channel = getelementptr inbounds %struct.DSPContext* %c, i32 0, i32 131
   store void (i32*, i32*, i32, i32, i32, i32, i32, i32*)* @mlp_filter_channel_x86, void (i32*, i32*, i32, i32, i32, i32, i32, i32*)** %mlp_filter_channel, align 4, !tbaa !0
   ret void
 }
 
-define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind address_safety {
+define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind sanitize_address {
 entry:
   %filter_shift.addr = alloca i32, align 4
   %mask.addr = alloca i32, align 4
diff --git a/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll b/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll
new file mode 100644
index 0000000..6a60d1c
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Here we check that the global redzone sizes grow with the object size.
+
+@G10 = global [10 x i8] zeroinitializer, align 1
+; CHECK: @G10 = global { [10 x i8], [54 x i8] }
+
+@G31 = global [31 x i8] zeroinitializer, align 1
+@G32 = global [32 x i8] zeroinitializer, align 1
+@G33 = global [33 x i8] zeroinitializer, align 1
+; CHECK: @G31 = global { [31 x i8], [33 x i8] }
+; CHECK: @G32 = global { [32 x i8], [32 x i8] }
+; CHECK: @G33 = global { [33 x i8], [63 x i8] }
+
+@G63 = global [63 x i8] zeroinitializer, align 1
+@G64 = global [64 x i8] zeroinitializer, align 1
+@G65 = global [65 x i8] zeroinitializer, align 1
+; CHECK: @G63 = global { [63 x i8], [33 x i8] }
+; CHECK: @G64 = global { [64 x i8], [32 x i8] }
+; CHECK: @G65 = global { [65 x i8], [63 x i8] }
+
+@G127 = global [127 x i8] zeroinitializer, align 1
+@G128 = global [128 x i8] zeroinitializer, align 1
+@G129 = global [129 x i8] zeroinitializer, align 1
+; CHECK: @G127 = global { [127 x i8], [33 x i8] }
+; CHECK: @G128 = global { [128 x i8], [32 x i8] }
+; CHECK: @G129 = global { [129 x i8], [63 x i8] }
+
+@G255 = global [255 x i8] zeroinitializer, align 1
+@G256 = global [256 x i8] zeroinitializer, align 1
+@G257 = global [257 x i8] zeroinitializer, align 1
+; CHECK: @G255 = global { [255 x i8], [33 x i8] }
+; CHECK: @G256 = global { [256 x i8], [64 x i8] }
+; CHECK: @G257 = global { [257 x i8], [95 x i8] }
+
+@G511 = global [511 x i8] zeroinitializer, align 1
+@G512 = global [512 x i8] zeroinitializer, align 1
+@G513 = global [513 x i8] zeroinitializer, align 1
+; CHECK: @G511 = global { [511 x i8], [97 x i8] }
+; CHECK: @G512 = global { [512 x i8], [128 x i8] }
+; CHECK: @G513 = global { [513 x i8], [159 x i8] }
+
+@G1023 = global [1023 x i8] zeroinitializer, align 1
+@G1024 = global [1024 x i8] zeroinitializer, align 1
+@G1025 = global [1025 x i8] zeroinitializer, align 1
+; CHECK: @G1023 = global { [1023 x i8], [225 x i8] }
+; CHECK: @G1024 = global { [1024 x i8], [256 x i8] }
+; CHECK: @G1025 = global { [1025 x i8], [287 x i8] }
+
+@G1000000 = global [1000000 x i8] zeroinitializer, align 1
+@G10000000 = global [10000000 x i8] zeroinitializer, align 1
+@G100000000 = global [100000000 x i8] zeroinitializer, align 1
+; CHECK: @G1000000 = global { [1000000 x i8], [249984 x i8] }
+; CHECK: @G10000000 = global { [10000000 x i8], [262144 x i8] }
+; CHECK: @G100000000 = global { [100000000 x i8], [262144 x i8] }
diff --git a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
index c0fe15e..da8f541 100644
--- a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
+++ b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -11,9 +11,9 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 @f = global %struct_of_7_bytes_4_aligned zeroinitializer, align 4
 
-; Accessing bytes 4 and 6, not ok to widen to i32 if address_safety is set.
+; Accessing bytes 4 and 6, not ok to widen to i32 if sanitize_address is set.
 
-define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone address_safety {
+define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone sanitize_address {
 entry:
   %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
@@ -36,7 +36,7 @@ define void @end_test_widening_bad() {
 
 ;; Accessing bytes 4 and 5. Ok to widen to i16.
 
-define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone address_safety {
+define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone sanitize_address {
 entry:
   %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 655f69c..c477b19 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -5,12 +5,12 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @test_load(i32* %a) address_safety {
+define i32 @test_load(i32* %a) sanitize_address {
 ; CHECK: @test_load
 ; CHECK-NOT: load
 ; CHECK:   %[[LOAD_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
 ; CHECK:   lshr i64 %[[LOAD_ADDR]], 3
-; CHECK:   or i64
+; CHECK:   {{or|add}}
 ; CHECK:   %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
 ; CHECK:   %[[LOAD_SHADOW:[^ ]*]] = load i8* %[[LOAD_SHADOW_PTR]]
 ; CHECK:   icmp ne i8
@@ -38,12 +38,12 @@ entry:
   ret i32 %tmp1
 }
 
-define void @test_store(i32* %a) address_safety {
+define void @test_store(i32* %a) sanitize_address {
 ; CHECK: @test_store
 ; CHECK-NOT: store
 ; CHECK:   %[[STORE_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
 ; CHECK:   lshr i64 %[[STORE_ADDR]], 3
-; CHECK:   or i64
+; CHECK:   {{or|add}}
 ; CHECK:   %[[STORE_SHADOW_PTR:[^ ]*]] = inttoptr
 ; CHECK:   %[[STORE_SHADOW:[^ ]*]] = load i8* %[[STORE_SHADOW_PTR]]
 ; CHECK:   icmp ne i8
@@ -73,7 +73,7 @@ entry:
 ; Check that asan leaves just one alloca.
 
 declare void @alloca_test_use([10 x i8]*)
-define void @alloca_test() address_safety {
+define void @alloca_test() sanitize_address {
 entry:
   %x = alloca [10 x i8], align 1
   %y = alloca [10 x i8], align 1
@@ -89,3 +89,42 @@ entry:
 ; CHECK-NOT: = alloca
 ; CHECK: ret void
 
+define void @LongDoubleTest(x86_fp80* nocapture %a) nounwind uwtable sanitize_address {
+entry:
+    store x86_fp80 0xK3FFF8000000000000000, x86_fp80* %a, align 16
+    ret void
+}
+
+; CHECK: LongDoubleTest
+; CHECK: __asan_report_store_n
+; CHECK: __asan_report_store_n
+; CHECK: ret void
+
+
+define void @i40test(i40* %a, i40* %b) nounwind uwtable sanitize_address {
+  entry:
+  %t = load i40* %a
+  store i40 %t, i40* %b, align 8
+  ret void
+}
+
+; CHECK: i40test
+; CHECK: __asan_report_load_n{{.*}}, i64 5)
+; CHECK: __asan_report_load_n{{.*}}, i64 5)
+; CHECK: __asan_report_store_n{{.*}}, i64 5)
+; CHECK: __asan_report_store_n{{.*}}, i64 5)
+; CHECK: ret void
+
+define void @i80test(i80* %a, i80* %b) nounwind uwtable sanitize_address {
+  entry:
+  %t = load i80* %a
+  store i80 %t, i80* %b, align 8
+  ret void
+}
+
+; CHECK: i80test
+; CHECK: __asan_report_load_n{{.*}}, i64 10)
+; CHECK: __asan_report_load_n{{.*}}, i64 10)
+; CHECK: __asan_report_store_n{{.*}}, i64 10)
+; CHECK: __asan_report_store_n{{.*}}, i64 10)
+; CHECK: ret void
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index f686ac1..7822fd0 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -6,7 +6,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @_Z3zzzi(i32 %p) nounwind uwtable address_safety {
+define i32 @_Z3zzzi(i32 %p) nounwind uwtable sanitize_address {
 entry:
   %p.addr = alloca i32, align 4
   %r = alloca i32, align 4
diff --git a/test/Instrumentation/AddressSanitizer/different_scale_and_offset.ll b/test/Instrumentation/AddressSanitizer/different_scale_and_offset.ll
new file mode 100644
index 0000000..b037176
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/different_scale_and_offset.ll
@@ -0,0 +1,41 @@
+; Test non-default shadow mapping scale and offset.
+;
+; RUN: opt < %s -asan -asan-mapping-scale=2 -asan-mapping-offset-log=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Test that ASan tells scale and offset to runtime.
+; CHECK: @__asan_mapping_offset = linkonce_odr constant i64 0
+; CHECK: @__asan_mapping_scale = linkonce_odr constant i64 2
+
+define i32 @test_load(i32* %a) sanitize_address {
+; CHECK: @test_load
+; CHECK-NOT: load
+; CHECK:   %[[LOAD_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
+; CHECK:   lshr i64 %[[LOAD_ADDR]], 2
+
+; No need in shift for zero offset.
+; CHECK-NOT:  or i64
+
+; CHECK:   %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
+; CHECK:   %[[LOAD_SHADOW:[^ ]*]] = load i8* %[[LOAD_SHADOW_PTR]]
+; CHECK:   icmp ne i8
+; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
+
+; No need in slow path for i32 and mapping scale equal to 2.
+; CHECK-NOT:   and i64 %[[LOAD_ADDR]]
+;
+; The crash block reports the error.
+; CHECK:   call void @__asan_report_load4(i64 %[[LOAD_ADDR]])
+; CHECK:   unreachable
+;
+; The actual load.
+; CHECK:   %tmp1 = load i32* %a
+; CHECK:   ret i32 %tmp1
+
+entry:
+  %tmp1 = load i32* %a
+  ret i32 %tmp1
+}
+
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
index 28d4ac0..2efd6b1 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @_Z3barv() uwtable address_safety {
+define void @_Z3barv() uwtable sanitize_address {
 entry:
   %a = alloca i32, align 4
   call void @_Z3fooPi(i32* %a)
diff --git a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
index e8f62b5..2d835a3 100644
--- a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
@@ -1,13 +1,13 @@
 ; RUN: opt < %s -asan -S | FileCheck %s
 ; AddressSanitizer must insert __asan_handle_no_return
-; before every noreturn call.
+; before every noreturn call or invoke.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
 declare void @MyNoReturnFunc(i32) noreturn
 
-define i32 @Call1(i8* nocapture %arg) uwtable address_safety {
+define i32 @Call1(i8* nocapture %arg) uwtable sanitize_address {
 entry:
   call void @MyNoReturnFunc(i32 1) noreturn  ; The call insn has noreturn attr.
 ; CHECK:        @Call1
@@ -17,7 +17,7 @@ entry:
   unreachable
 }
 
-define i32 @Call2(i8* nocapture %arg) uwtable address_safety {
+define i32 @Call2(i8* nocapture %arg) uwtable sanitize_address {
 entry:
   call void @MyNoReturnFunc(i32 1)  ; No noreturn attribure on the call.
 ; CHECK:        @Call2
@@ -26,3 +26,24 @@ entry:
 ; CHECK-NEXT: unreachable
   unreachable
 }
+
+declare i32 @__gxx_personality_v0(...)
+
+define i64 @Invoke1(i8** %esc) nounwind uwtable ssp sanitize_address {
+entry:
+  invoke void @MyNoReturnFunc(i32 1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i64 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i64 1
+}
+; CHECK: @Invoke1
+; CHECK:        call void @__asan_handle_no_return
+; CHECK-NEXT:   invoke void @MyNoReturnFunc
+; CHECK: ret i64 0
+; CHECK: ret i64 1
diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
index 042c06b..584db37 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -23,7 +23,7 @@ entry:
   ret void
 }
 
-define internal void @_GLOBAL__I_a() address_safety section ".text.startup" {
+define internal void @_GLOBAL__I_a() sanitize_address section ".text.startup" {
 entry:
   call void @__cxx_global_var_init()
   ret void
@@ -40,7 +40,7 @@ entry:
 ; CHECK: ret
 
 ; Check that xxx is instrumented.
-define void @touch_xxx() address_safety {
+define void @touch_xxx() sanitize_address {
   store i32 0, i32 *@xxx, align 4
   ret void
 ; CHECK: define void @touch_xxx
@@ -49,7 +49,7 @@ define void @touch_xxx() address_safety {
 }
 
 ; Check that XXX is instrumented.
-define void @touch_XXX() address_safety {
+define void @touch_XXX() sanitize_address {
   store i32 0, i32 *@XXX, align 4
   ret void
 ; CHECK: define void @touch_XXX
@@ -59,7 +59,7 @@ define void @touch_XXX() address_safety {
 
 
 ; Check that yyy is NOT instrumented (as it does not have dynamic initializer).
-define void @touch_yyy() address_safety {
+define void @touch_yyy() sanitize_address {
   store i32 0, i32 *@yyy, align 4
   ret void
 ; CHECK: define void @touch_yyy
@@ -68,7 +68,7 @@ define void @touch_yyy() address_safety {
 }
 
 ; Check that YYY is NOT instrumented (as it does not have dynamic initializer).
-define void @touch_YYY() address_safety {
+define void @touch_YYY() sanitize_address {
   store i32 0, i32 *@YYY, align 4
   ret void
 ; CHECK: define void @touch_YYY
diff --git a/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
index 633bf9a..23cf6d2 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
@@ -4,7 +4,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
-define void @IncrementMe(i32* %a) address_safety {
+define void @IncrementMe(i32* %a) sanitize_address {
 entry:
   %tmp1 = load i32* %a, align 4
   %tmp2 = add i32 %tmp1,  1
diff --git a/test/Instrumentation/AddressSanitizer/lifetime.ll b/test/Instrumentation/AddressSanitizer/lifetime.ll
index 982ad08..3348728 100644
--- a/test/Instrumentation/AddressSanitizer/lifetime.ll
+++ b/test/Instrumentation/AddressSanitizer/lifetime.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
-define void @lifetime_no_size() address_safety {
+define void @lifetime_no_size() sanitize_address {
 entry:
   %i = alloca i32, align 4
   %i.ptr = bitcast i32* %i to i8*
@@ -23,7 +23,7 @@ entry:
 }
 
 ; Generic case of lifetime analysis.
-define void @lifetime() address_safety {
+define void @lifetime() sanitize_address {
   ; CHECK: @lifetime
 
   ; Regular variable lifetime intrinsics.
@@ -61,7 +61,7 @@ define void @lifetime() address_safety {
 }
 
 ; Check that arguments of lifetime may come from phi nodes.
-define void @phi_args(i1 %x) address_safety {
+define void @phi_args(i1 %x) sanitize_address {
   ; CHECK: @phi_args
 
 entry:
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
index d544d77..6aa5c28 100644
--- a/test/Instrumentation/AddressSanitizer/test64.ll
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -asan -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
-define i32 @read_4_bytes(i32* %a) address_safety {
+define i32 @read_4_bytes(i32* %a) sanitize_address {
 entry:
   %tmp1 = load i32* %a, align 4
   ret i32 %tmp1
@@ -9,11 +9,11 @@ entry:
 ; CHECK: @read_4_bytes
 ; CHECK-NOT: ret
 ; CHECK: lshr {{.*}} 3
-; Check for ASAN's Offset for 64-bit (2^44)
-; CHECK-NEXT: 17592186044416
+; Check for ASAN's Offset for 64-bit (2^44 or 7fff8000)
+; CHECK-NEXT: {{17592186044416|2147450880}}
 ; CHECK: ret
 
-define void @example_atomicrmw(i64* %ptr) nounwind uwtable address_safety {
+define void @example_atomicrmw(i64* %ptr) nounwind uwtable sanitize_address {
 entry:
   %0 = atomicrmw add i64* %ptr, i64 1 seq_cst
   ret void
@@ -24,7 +24,7 @@ entry:
 ; CHECK: atomicrmw
 ; CHECK: ret
 
-define void @example_cmpxchg(i64* %ptr, i64 %compare_to, i64 %new_value) nounwind uwtable address_safety {
+define void @example_cmpxchg(i64* %ptr, i64 %compare_to, i64 %new_value) nounwind uwtable sanitize_address {
 entry:
   %0 = cmpxchg i64* %ptr, i64 %compare_to, i64 %new_value seq_cst
   ret void
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 20957fb..60f946f 100644
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -362,6 +362,41 @@ define zeroext i1 @ICmpSLE(i32 %x) nounwind uwtable readnone {
 ; CHECK: ret i1
 
 
+; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
+; of the vector arguments.
+
+define <2 x i1> @ICmpSLT_vector(<2 x i32*> %x) nounwind uwtable readnone {
+  %1 = icmp slt <2 x i32*> %x, zeroinitializer
+  ret <2 x i1> %1
+}
+
+; CHECK: @ICmpSLT_vector
+; CHECK: icmp slt <2 x i64>
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp slt <2 x i32*>
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret <2 x i1>
+
+
+; Check that we propagate shadow for unsigned relational comparisons with
+; constants
+
+define zeroext i1 @ICmpUGTConst(i32 %x) nounwind uwtable readnone {
+entry:
+  %cmp = icmp ugt i32 %x, 7
+  ret i1 %cmp
+}
+
+; CHECK: @ICmpUGTConst
+; CHECK: icmp ugt i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp ugt i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp ugt i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+
 ; Check that loads of shadow have the same aligment as the original loads.
 ; Check that loads of origin have the aligment of max(4, original alignment).
 
@@ -534,3 +569,30 @@ define <8 x i8*> @VectorOfPointers(<8 x i8*>* %p) nounwind uwtable {
 ; CHECK: load <8 x i8*>*
 ; CHECK: store <8 x i64> {{.*}} @__msan_retval_tls
 ; CHECK: ret <8 x i8*>
+
+; Test handling of va_copy.
+
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+define void @VACopy(i8* %p1, i8* %p2) nounwind uwtable {
+  call void @llvm.va_copy(i8* %p1, i8* %p2) nounwind
+  ret void
+}
+
+; CHECK: @VACopy
+; CHECK: call void @llvm.memset.p0i8.i64({{.*}}, i8 0, i64 24, i32 8, i1 false)
+; CHECK: ret void
+
+
+; Test handling of volatile stores.
+; Check that MemorySanitizer does not add a check of the value being stored.
+
+define void @VolatileStore(i32* nocapture %p, i32 %x) nounwind uwtable {
+entry:
+  store volatile i32 %x, i32* %p, align 4
+  ret void
+}
+
+; CHECK: @VolatileStore
+; CHECK-NOT: @__msan_warning
+; CHECK: ret void
diff --git a/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll b/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll
new file mode 100644
index 0000000..a83a274
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -basicaa -gvn -tsan -S | FileCheck %s
+; TSAN conflicts with load widening. Make sure the load widening is off with -tsan.
+
+; 32-bit little endian target.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+%struct_of_8_bytes_4_aligned = type { i32, i8, i8, i8, i8}
+
+@f = global %struct_of_8_bytes_4_aligned zeroinitializer, align 4
+
+; Accessing bytes 4 and 6, not ok to widen to i32 if sanitize_thread is set.
+
+define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone sanitize_thread {
+entry:
+  %tmp = load i8* getelementptr inbounds (%struct_of_8_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%struct_of_8_bytes_4_aligned* @f, i64 0, i32 3), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening_bad
+; CHECK: call void @__tsan_read1
+; CHECK: call void @__tsan_read1
+; CHECK-NOT: call void @__tsan_read4
+; CHECK: ret i32
+}
diff --git a/test/JitListener/test-inline.ll b/test/JitListener/test-inline.ll
index 8bfaeaf..ca5d8d6 100644
--- a/test/JitListener/test-inline.ll
+++ b/test/JitListener/test-inline.ll
@@ -1,13 +1,29 @@
 ; RUN: llvm-jitlistener %s | FileCheck %s
 
-; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 165
-; CHECK: Method load [2]: _Z3food, Size = 39
+; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+; CHECK:   Line info @ 0: test-inline.cpp, line 33
+; CHECK:   Line info @ 35: test-inline.cpp, line 34
+; CHECK:   Line info @ 165: test-inline.cpp, line 35
+; CHECK: Method load [2]: _Z3foov, Size = 3
+; CHECK:   Line info @ 0: test-inline.cpp, line 28
+; CHECK:   Line info @ 2: test-inline.cpp, line 29
+; CHECK:   Line info @ 3: test-inline.cpp, line 29
 ; CHECK: Method load [3]: main, Size = 146
+; CHECK:   Line info @ 0: test-inline.cpp, line 39
+; CHECK:   Line info @ 21: test-inline.cpp, line 41
+; CHECK:   Line info @ 39: test-inline.cpp, line 42
+; CHECK:   Line info @ 60: test-inline.cpp, line 44
+; CHECK:   Line info @ 80: test-inline.cpp, line 48
+; CHECK:   Line info @ 90: test-inline.cpp, line 45
+; CHECK:   Line info @ 95: test-inline.cpp, line 46
+; CHECK:   Line info @ 114: test-inline.cpp, line 48
+; CHECK:   Line info @ 141: test-inline.cpp, line 49
+; CHECK:   Line info @ 146: test-inline.cpp, line 49
 ; CHECK: Method unload [1]
 ; CHECK: Method unload [2]
 ; CHECK: Method unload [3]
 
-; ModuleID = 'test-inline.bc'
+; ModuleID = 'test-inline.cpp'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -25,64 +41,54 @@ entry:
   %us.addr = alloca i16, align 2
   %l.addr = alloca i64, align 8
   %result = alloca double, align 8
-  %result2 = alloca i32, align 4
   store float* %pf, float** %pf.addr, align 8
-  call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !32), !dbg !35
+  call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !46), !dbg !47
   store [2 x double]* %ppd, [2 x double]** %ppd.addr, align 8
-  call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !36), !dbg !39
+  call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !48), !dbg !47
   store %struct.char_struct* %s, %struct.char_struct** %s.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !40), !dbg !42
+  call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !49), !dbg !47
   store i32** %ppn, i32*** %ppn.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !43), !dbg !46
+  call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !50), !dbg !47
   store i16 %us, i16* %us.addr, align 2
-  call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !47), !dbg !49
+  call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !51), !dbg !47
   store i64 %l, i64* %l.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !50), !dbg !53
-  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !54), !dbg !56
-  %0 = load float** %pf.addr, align 8, !dbg !57
-  %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !57
-  %1 = load float* %arrayidx, !dbg !57
-  %conv = fpext float %1 to double, !dbg !57
-  %2 = load [2 x double]** %ppd.addr, align 8, !dbg !57
-  %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !57
-  %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !57
-  %3 = load double* %arrayidx2, !dbg !57
-  %mul = fmul double %conv, %3, !dbg !57
-  %4 = load %struct.char_struct** %s.addr, !dbg !57
-  %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !57
-  %5 = load i8* %c, align 1, !dbg !57
-  %conv3 = sext i8 %5 to i32, !dbg !57
-  %conv4 = sitofp i32 %conv3 to double, !dbg !57
-  %mul5 = fmul double %mul, %conv4, !dbg !57
-  %6 = load i16* %us.addr, align 2, !dbg !57
-  %conv6 = zext i16 %6 to i32, !dbg !57
-  %conv7 = sitofp i32 %conv6 to double, !dbg !57
-  %mul8 = fmul double %mul5, %conv7, !dbg !57
-  %7 = load i64* %l.addr, align 8, !dbg !57
-  %conv9 = uitofp i64 %7 to double, !dbg !57
-  %mul10 = fmul double %mul8, %conv9, !dbg !57
-  store double %mul10, double* %result, align 8, !dbg !57
-  call void @llvm.dbg.declare(metadata !{i32* %result2}, metadata !58), !dbg !59
-  %8 = load double* %result, align 8, !dbg !60
-  %call = call i32 @_Z3food(double %8), !dbg !60
-  store i32 %call, i32* %result2, align 4, !dbg !60
-  %9 = load i32* %result2, align 4, !dbg !61
-  %conv11 = sitofp i32 %9 to double, !dbg !61
-  ret double %conv11, !dbg !61
+  call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !52), !dbg !47
+  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !53), !dbg !55
+  %0 = load float** %pf.addr, align 8, !dbg !55
+  %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !55
+  %1 = load float* %arrayidx, align 4, !dbg !55
+  %conv = fpext float %1 to double, !dbg !55
+  %2 = load [2 x double]** %ppd.addr, align 8, !dbg !55
+  %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !55
+  %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !55
+  %3 = load double* %arrayidx2, align 8, !dbg !55
+  %mul = fmul double %conv, %3, !dbg !55
+  %4 = load %struct.char_struct** %s.addr, align 8, !dbg !55
+  %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !55
+  %5 = load i8* %c, align 1, !dbg !55
+  %conv3 = sext i8 %5 to i32, !dbg !55
+  %conv4 = sitofp i32 %conv3 to double, !dbg !55
+  %mul5 = fmul double %mul, %conv4, !dbg !55
+  %6 = load i16* %us.addr, align 2, !dbg !55
+  %conv6 = zext i16 %6 to i32, !dbg !55
+  %conv7 = sitofp i32 %conv6 to double, !dbg !55
+  %mul8 = fmul double %mul5, %conv7, !dbg !55
+  %7 = load i64* %l.addr, align 8, !dbg !55
+  %conv9 = uitofp i64 %7 to double, !dbg !55
+  %mul10 = fmul double %mul8, %conv9, !dbg !55
+  %call = call i32 @_Z3foov(), !dbg !55
+  %conv11 = sitofp i32 %call to double, !dbg !55
+  %add = fadd double %mul10, %conv11, !dbg !55
+  store double %add, double* %result, align 8, !dbg !55
+  %8 = load double* %result, align 8, !dbg !56
+  ret double %8, !dbg !56
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-define linkonce_odr i32 @_Z3food(double %input) nounwind uwtable inlinehint {
+define linkonce_odr i32 @_Z3foov() nounwind uwtable inlinehint {
 entry:
-  %input.addr = alloca double, align 8
-  store double %input, double* %input.addr, align 8
-  call void @llvm.dbg.declare(metadata !{double* %input.addr}, metadata !62), !dbg !63
-  %0 = load double* %input.addr, align 8, !dbg !64
-  %div = fdiv double %0, 3.000000e+00, !dbg !64
-  %add = fadd double %div, 1.000000e+00, !dbg !64
-  %conv = fptosi double %add to i32, !dbg !64
-  ret i32 %conv, !dbg !64
+  ret i32 0, !dbg !57
 }
 
 define i32 @main(i32 %argc, i8** %argv) uwtable {
@@ -96,124 +102,111 @@ entry:
   %result = alloca double, align 8
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !66), !dbg !67
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !59), !dbg !60
   store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !68), !dbg !71
-  call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !72), !dbg !74
-  call void @llvm.dbg.declare(metadata !{float* %f}, metadata !75), !dbg !76
-  store float 0.000000e+00, float* %f, align 4, !dbg !77
-  call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !78), !dbg !81
-  %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !82
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !82
-  %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !83
-  store i8 97, i8* %c, align 1, !dbg !83
-  %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !84
-  %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !84
-  store i8 48, i8* %arrayidx, align 1, !dbg !84
-  %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !85
-  %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !85
-  store i8 49, i8* %arrayidx2, align 1, !dbg !85
-  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !86), !dbg !87
-  %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !88
-  %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !88
-  store double %call, double* %result, align 8, !dbg !88
-  %1 = load double* %result, align 8, !dbg !89
-  %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !89
-  %cond = select i1 %cmp, i32 0, i32 -1, !dbg !89
-  ret i32 %cond, !dbg !89
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !61), !dbg !60
+  call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !62), !dbg !64
+  call void @llvm.dbg.declare(metadata !{float* %f}, metadata !65), !dbg !66
+  store float 0.000000e+00, float* %f, align 4, !dbg !66
+  call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !67), !dbg !70
+  %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !70
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !70
+  %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !71
+  store i8 97, i8* %c, align 1, !dbg !71
+  %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !72
+  %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !72
+  store i8 48, i8* %arrayidx, align 1, !dbg !72
+  %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !73
+  %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !73
+  store i8 49, i8* %arrayidx2, align 1, !dbg !73
+  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !74), !dbg !75
+  %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !75
+  %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !75
+  store double %call, double* %result, align 8, !dbg !75
+  %1 = load double* %result, align 8, !dbg !76
+  %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !76
+  %cond = select i1 %cmp, i32 0, i32 -1, !dbg !76
+  ret i32 %cond, !dbg !76
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"test-inline.cpp", metadata !"/home/athirumurthi/dev/opencl-mc/build/RH64/Debug/backend/llvm", metadata !"clang version 3.0 (branches/release_30 36797)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !17} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test-inline.cpp", metadata !"/home/akaylor/dev", metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-inline.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !12, metadata !16}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 33, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"test-inline.cpp", metadata !"/home/athirumurthi/dev/opencl-mc/build/RH64/Debug/backend/llvm", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 40, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !35, metadata !40}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 32, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
+!6 = metadata !{i32 786473, metadata !"test-inline.cpp", metadata !"/home/akaylor/dev", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !10, metadata !12, metadata !16, metadata !29, metadata !32, metadata !33}
+!9 = metadata !{i32 786468, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
+!11 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!12 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !14, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
 !14 = metadata !{metadata !15}
-!15 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3food", metadata !6, i32 28, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (double)* @_Z3food, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!17 = metadata !{metadata !18}
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 720948, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !20, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ]
-!20 = metadata !{i32 720898, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, null} ; [ DW_TAG_class_type ]
-!21 = metadata !{metadata !22, metadata !24, metadata !28}
-!22 = metadata !{i32 720909, metadata !20, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !23} ; [ DW_TAG_member ]
-!23 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 720909, metadata !20, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !25} ; [ DW_TAG_member ]
-!25 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !23, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!26 = metadata !{metadata !27}
-!27 = metadata !{i32 720929, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
-!28 = metadata !{i32 720942, i32 0, metadata !20, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !10} ; [ DW_TAG_subprogram ]
-!29 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!30 = metadata !{null, metadata !31}
-!31 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 721153, metadata !5, metadata !"pf", metadata !6, i32 16777248, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!33 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
-!34 = metadata !{i32 720932, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 32, i32 31, metadata !5, null}
-!36 = metadata !{i32 721153, metadata !5, metadata !"ppd", metadata !6, i32 33554464, metadata !37, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!37 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !38} ; [ DW_TAG_pointer_type ]
-!38 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!39 = metadata !{i32 32, i32 42, metadata !5, null}
-!40 = metadata !{i32 721153, metadata !5, metadata !"s", metadata !6, i32 50331680, metadata !41, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!41 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_reference_type ]
-!42 = metadata !{i32 32, i32 72, metadata !5, null}
-!43 = metadata !{i32 721153, metadata !5, metadata !"ppn", metadata !6, i32 67108896, metadata !44, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!44 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !45} ; [ DW_TAG_pointer_type ]
-!45 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
-!46 = metadata !{i32 32, i32 81, metadata !5, null}
-!47 = metadata !{i32 721153, metadata !5, metadata !"us", metadata !6, i32 83886112, metadata !48, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!48 = metadata !{i32 720932, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!49 = metadata !{i32 32, i32 105, metadata !5, null}
-!50 = metadata !{i32 721153, metadata !5, metadata !"l", metadata !6, i32 100663328, metadata !51, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!51 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !52} ; [ DW_TAG_const_type ]
-!52 = metadata !{i32 720932, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!53 = metadata !{i32 32, i32 135, metadata !5, null}
-!54 = metadata !{i32 721152, metadata !55, metadata !"result", metadata !6, i32 34, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!55 = metadata !{i32 720907, metadata !5, i32 33, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
-!56 = metadata !{i32 34, i32 10, metadata !55, null}
-!57 = metadata !{i32 34, i32 51, metadata !55, null}
-!58 = metadata !{i32 721152, metadata !55, metadata !"result2", metadata !6, i32 35, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!59 = metadata !{i32 35, i32 7, metadata !55, null}
-!60 = metadata !{i32 35, i32 17, metadata !55, null}
-!61 = metadata !{i32 36, i32 3, metadata !55, null}
-!62 = metadata !{i32 721153, metadata !16, metadata !"input", metadata !6, i32 16777243, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!63 = metadata !{i32 27, i32 23, metadata !16, null}
-!64 = metadata !{i32 29, i32 3, metadata !65, null}
-!65 = metadata !{i32 720907, metadata !16, i32 28, i32 1, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
-!66 = metadata !{i32 721153, metadata !12, metadata !"argc", metadata !6, i32 16777255, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!67 = metadata !{i32 39, i32 14, metadata !12, null}
-!68 = metadata !{i32 721153, metadata !12, metadata !"argv", metadata !6, i32 33554471, metadata !69, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!69 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
-!70 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!71 = metadata !{i32 39, i32 26, metadata !12, null}
-!72 = metadata !{i32 721152, metadata !73, metadata !"s", metadata !6, i32 41, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!73 = metadata !{i32 720907, metadata !12, i32 40, i32 1, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
-!74 = metadata !{i32 41, i32 22, metadata !73, null}
-!75 = metadata !{i32 721152, metadata !73, metadata !"f", metadata !6, i32 42, metadata !34, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!76 = metadata !{i32 42, i32 9, metadata !73, null}
-!77 = metadata !{i32 42, i32 16, metadata !73, null}
-!78 = metadata !{i32 721152, metadata !73, metadata !"d", metadata !6, i32 43, metadata !79, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!79 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !80, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!80 = metadata !{metadata !27, metadata !27}
-!81 = metadata !{i32 43, i32 10, metadata !73, null}
-!82 = metadata !{i32 43, i32 38, metadata !73, null}
-!83 = metadata !{i32 45, i32 3, metadata !73, null}
-!84 = metadata !{i32 46, i32 3, metadata !73, null}
-!85 = metadata !{i32 47, i32 3, metadata !73, null}
-!86 = metadata !{i32 721152, metadata !73, metadata !"result", metadata !6, i32 49, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!87 = metadata !{i32 49, i32 10, metadata !73, null}
-!88 = metadata !{i32 49, i32 19, metadata !73, null}
-!89 = metadata !{i32 50, i32 3, metadata !73, null}
+!15 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
+!16 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
+!17 = metadata !{i32 786451, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !18, i32 0, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [from ]
+!18 = metadata !{metadata !19, metadata !21, metadata !23}
+!19 = metadata !{i32 786445, metadata !17, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !20} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
+!20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!21 = metadata !{i32 786445, metadata !17, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !22} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
+!22 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !20, metadata !14, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
+!23 = metadata !{i32 786478, i32 0, metadata !17, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !27, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
+!24 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = metadata !{null, metadata !26}
+!26 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
+!27 = metadata !{metadata !28}
+!28 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!29 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!30 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !31} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!31 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!32 = metadata !{i32 786468, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!33 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !34} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
+!34 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!35 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 38, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
+!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = metadata !{metadata !31, metadata !31, metadata !38}
+!38 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!39 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!40 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 27, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
+!41 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!42 = metadata !{metadata !31}
+!43 = metadata !{metadata !44}
+!44 = metadata !{metadata !45}
+!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !17, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
+!46 = metadata !{i32 786689, metadata !5, metadata !"pf", metadata !6, i32 16777248, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [pf] [line 32]
+!47 = metadata !{i32 32, i32 0, metadata !5, null}
+!48 = metadata !{i32 786689, metadata !5, metadata !"ppd", metadata !6, i32 33554464, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppd] [line 32]
+!49 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 50331680, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [s] [line 32]
+!50 = metadata !{i32 786689, metadata !5, metadata !"ppn", metadata !6, i32 67108896, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppn] [line 32]
+!51 = metadata !{i32 786689, metadata !5, metadata !"us", metadata !6, i32 83886112, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [us] [line 32]
+!52 = metadata !{i32 786689, metadata !5, metadata !"l", metadata !6, i32 100663328, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [l] [line 32]
+!53 = metadata !{i32 786688, metadata !54, metadata !"result", metadata !6, i32 34, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 34]
+!54 = metadata !{i32 786443, metadata !5, i32 33, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!55 = metadata !{i32 34, i32 0, metadata !54, null}
+!56 = metadata !{i32 35, i32 0, metadata !54, null}
+!57 = metadata !{i32 29, i32 0, metadata !58, null}
+!58 = metadata !{i32 786443, metadata !40, i32 28, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!59 = metadata !{i32 786689, metadata !35, metadata !"argc", metadata !6, i32 16777254, metadata !31, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 38]
+!60 = metadata !{i32 38, i32 0, metadata !35, null}
+!61 = metadata !{i32 786689, metadata !35, metadata !"argv", metadata !6, i32 33554470, metadata !38, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 38]
+!62 = metadata !{i32 786688, metadata !63, metadata !"s", metadata !6, i32 40, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 40]
+!63 = metadata !{i32 786443, metadata !35, i32 39, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!64 = metadata !{i32 40, i32 0, metadata !63, null}
+!65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
+!66 = metadata !{i32 41, i32 0, metadata !63, null}
+!67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
+!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !69, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
+!69 = metadata !{metadata !15, metadata !15}
+!70 = metadata !{i32 42, i32 0, metadata !63, null}
+!71 = metadata !{i32 44, i32 0, metadata !63, null}
+!72 = metadata !{i32 45, i32 0, metadata !63, null}
+!73 = metadata !{i32 46, i32 0, metadata !63, null}
+!74 = metadata !{i32 786688, metadata !63, metadata !"result", metadata !6, i32 48, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 48]
+!75 = metadata !{i32 48, i32 0, metadata !63, null}
+!76 = metadata !{i32 49, i32 0, metadata !63, null}
diff --git a/test/JitListener/test-parameters.ll b/test/JitListener/test-parameters.ll
index 0c437a8..1e2a2b3 100644
--- a/test/JitListener/test-parameters.ll
+++ b/test/JitListener/test-parameters.ll
@@ -1,13 +1,28 @@
 ; RUN: llvm-jitlistener %s | FileCheck %s
 
 ; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+; CHECK:   Line info @ 0: test-parameters.cpp, line 33
+; CHECK:   Line info @ 35: test-parameters.cpp, line 34
+; CHECK:   Line info @ 165: test-parameters.cpp, line 35
 ; CHECK: Method load [2]: _Z3foov, Size = 3
+; CHECK:   Line info @ 0: test-parameters.cpp, line 28
+; CHECK:   Line info @ 2: test-parameters.cpp, line 29
 ; CHECK: Method load [3]: main, Size = 146
+; CHECK:   Line info @ 0: test-parameters.cpp, line 39
+; CHECK:   Line info @ 21: test-parameters.cpp, line 41
+; CHECK:   Line info @ 39: test-parameters.cpp, line 42
+; CHECK:   Line info @ 60: test-parameters.cpp, line 44
+; CHECK:   Line info @ 80: test-parameters.cpp, line 48
+; CHECK:   Line info @ 90: test-parameters.cpp, line 45
+; CHECK:   Line info @ 95: test-parameters.cpp, line 46
+; CHECK:   Line info @ 114: test-parameters.cpp, line 48
+; CHECK:   Line info @ 141: test-parameters.cpp, line 49
+; CHECK:   Line info @ 146: test-parameters.cpp, line 49
 ; CHECK: Method unload [1]
 ; CHECK: Method unload [2]
 ; CHECK: Method unload [3]
 
-; ModuleID = 'test-parameters.bc'
+; ModuleID = 'test-parameters.cpp'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -18,7 +33,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define i32 @_Z3foov() nounwind uwtable {
 entry:
-  ret i32 0, !dbg !32
+  ret i32 0, !dbg !46
 }
 
 define double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %pf, [2 x double]* %ppd, %struct.char_struct* %s, i32** %ppn, i16 zeroext %us, i64 %l) nounwind uwtable {
@@ -31,46 +46,46 @@ entry:
   %l.addr = alloca i64, align 8
   %result = alloca double, align 8
   store float* %pf, float** %pf.addr, align 8
-  call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !34), !dbg !37
+  call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !48), !dbg !49
   store [2 x double]* %ppd, [2 x double]** %ppd.addr, align 8
-  call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !38), !dbg !41
+  call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !50), !dbg !49
   store %struct.char_struct* %s, %struct.char_struct** %s.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !42), !dbg !44
+  call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !51), !dbg !49
   store i32** %ppn, i32*** %ppn.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !45), !dbg !48
+  call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !52), !dbg !49
   store i16 %us, i16* %us.addr, align 2
-  call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !49), !dbg !51
+  call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !53), !dbg !49
   store i64 %l, i64* %l.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !52), !dbg !55
-  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !56), !dbg !58
-  %0 = load float** %pf.addr, align 8, !dbg !59
-  %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !59
-  %1 = load float* %arrayidx, !dbg !59
-  %conv = fpext float %1 to double, !dbg !59
-  %2 = load [2 x double]** %ppd.addr, align 8, !dbg !59
-  %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !59
-  %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !59
-  %3 = load double* %arrayidx2, !dbg !59
-  %mul = fmul double %conv, %3, !dbg !59
-  %4 = load %struct.char_struct** %s.addr, !dbg !59
-  %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !59
-  %5 = load i8* %c, align 1, !dbg !59
-  %conv3 = sext i8 %5 to i32, !dbg !59
-  %conv4 = sitofp i32 %conv3 to double, !dbg !59
-  %mul5 = fmul double %mul, %conv4, !dbg !59
-  %6 = load i16* %us.addr, align 2, !dbg !59
-  %conv6 = zext i16 %6 to i32, !dbg !59
-  %conv7 = sitofp i32 %conv6 to double, !dbg !59
-  %mul8 = fmul double %mul5, %conv7, !dbg !59
-  %7 = load i64* %l.addr, align 8, !dbg !59
-  %conv9 = uitofp i64 %7 to double, !dbg !59
-  %mul10 = fmul double %mul8, %conv9, !dbg !59
-  %call = call i32 @_Z3foov(), !dbg !60
-  %conv11 = sitofp i32 %call to double, !dbg !60
-  %add = fadd double %mul10, %conv11, !dbg !60
-  store double %add, double* %result, align 8, !dbg !60
-  %8 = load double* %result, align 8, !dbg !61
-  ret double %8, !dbg !61
+  call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !54), !dbg !49
+  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !55), !dbg !57
+  %0 = load float** %pf.addr, align 8, !dbg !57
+  %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !57
+  %1 = load float* %arrayidx, align 4, !dbg !57
+  %conv = fpext float %1 to double, !dbg !57
+  %2 = load [2 x double]** %ppd.addr, align 8, !dbg !57
+  %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !57
+  %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !57
+  %3 = load double* %arrayidx2, align 8, !dbg !57
+  %mul = fmul double %conv, %3, !dbg !57
+  %4 = load %struct.char_struct** %s.addr, align 8, !dbg !57
+  %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !57
+  %5 = load i8* %c, align 1, !dbg !57
+  %conv3 = sext i8 %5 to i32, !dbg !57
+  %conv4 = sitofp i32 %conv3 to double, !dbg !57
+  %mul5 = fmul double %mul, %conv4, !dbg !57
+  %6 = load i16* %us.addr, align 2, !dbg !57
+  %conv6 = zext i16 %6 to i32, !dbg !57
+  %conv7 = sitofp i32 %conv6 to double, !dbg !57
+  %mul8 = fmul double %mul5, %conv7, !dbg !57
+  %7 = load i64* %l.addr, align 8, !dbg !57
+  %conv9 = uitofp i64 %7 to double, !dbg !57
+  %mul10 = fmul double %mul8, %conv9, !dbg !57
+  %call = call i32 @_Z3foov(), !dbg !57
+  %conv11 = sitofp i32 %call to double, !dbg !57
+  %add = fadd double %mul10, %conv11, !dbg !57
+  store double %add, double* %result, align 8, !dbg !57
+  %8 = load double* %result, align 8, !dbg !58
+  ret double %8, !dbg !58
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
@@ -86,120 +101,111 @@ entry:
   %result = alloca double, align 8
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !62), !dbg !63
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !59), !dbg !60
   store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !64), !dbg !67
-  call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !68), !dbg !70
-  call void @llvm.dbg.declare(metadata !{float* %f}, metadata !71), !dbg !72
-  store float 0.000000e+00, float* %f, align 4, !dbg !73
-  call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !74), !dbg !77
-  %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !78
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !78
-  %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !79
-  store i8 97, i8* %c, align 1, !dbg !79
-  %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !80
-  %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !80
-  store i8 48, i8* %arrayidx, align 1, !dbg !80
-  %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !81
-  %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !81
-  store i8 49, i8* %arrayidx2, align 1, !dbg !81
-  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !82), !dbg !83
-  %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !84
-  %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !84
-  store double %call, double* %result, align 8, !dbg !84
-  %1 = load double* %result, align 8, !dbg !85
-  %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !85
-  %cond = select i1 %cmp, i32 0, i32 -1, !dbg !85
-  ret i32 %cond, !dbg !85
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !61), !dbg !60
+  call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !62), !dbg !64
+  call void @llvm.dbg.declare(metadata !{float* %f}, metadata !65), !dbg !66
+  store float 0.000000e+00, float* %f, align 4, !dbg !66
+  call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !67), !dbg !70
+  %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !70
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !70
+  %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !71
+  store i8 97, i8* %c, align 1, !dbg !71
+  %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !72
+  %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !72
+  store i8 48, i8* %arrayidx, align 1, !dbg !72
+  %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !73
+  %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !73
+  store i8 49, i8* %arrayidx2, align 1, !dbg !73
+  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !74), !dbg !75
+  %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !75
+  %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !75
+  store double %call, double* %result, align 8, !dbg !75
+  %1 = load double* %result, align 8, !dbg !76
+  %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !76
+  %cond = select i1 %cmp, i32 0, i32 -1, !dbg !76
+  ret i32 %cond, !dbg !76
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"test-parameters.cpp", metadata !"/home/athirumurthi/dev/opencl-mc/build/RH64/Debug/backend/llvm", metadata !"clang version 3.0 (branches/release_30 36797)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !17} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev", metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-parameters.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !12, metadata !16}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 28, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"test-parameters.cpp", metadata !"/home/athirumurthi/dev/opencl-mc/build/RH64/Debug/backend/llvm", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !10, metadata !38}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 27, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
+!6 = metadata !{i32 786473, metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 720942, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 33, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 720932, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 39, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!17 = metadata !{metadata !18}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 32, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
+!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !13, metadata !14, metadata !16, metadata !20, metadata !33, metadata !35, metadata !36}
+!13 = metadata !{i32 786468, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
+!15 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!16 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!17 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !13, metadata !18, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
 !18 = metadata !{metadata !19}
-!19 = metadata !{i32 720948, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !20, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ]
-!20 = metadata !{i32 720898, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, null} ; [ DW_TAG_class_type ]
-!21 = metadata !{metadata !22, metadata !24, metadata !28}
-!22 = metadata !{i32 720909, metadata !20, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !23} ; [ DW_TAG_member ]
-!23 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 720909, metadata !20, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !25} ; [ DW_TAG_member ]
-!25 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !23, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!26 = metadata !{metadata !27}
-!27 = metadata !{i32 720929, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
-!28 = metadata !{i32 720942, i32 0, metadata !20, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !10} ; [ DW_TAG_subprogram ]
-!29 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!30 = metadata !{null, metadata !31}
-!31 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 29, i32 3, metadata !33, null}
-!33 = metadata !{i32 720907, metadata !5, i32 28, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
-!34 = metadata !{i32 721153, metadata !12, metadata !"pf", metadata !6, i32 16777248, metadata !35, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!35 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !36} ; [ DW_TAG_pointer_type ]
-!36 = metadata !{i32 720932, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!37 = metadata !{i32 32, i32 31, metadata !12, null}
-!38 = metadata !{i32 721153, metadata !12, metadata !"ppd", metadata !6, i32 33554464, metadata !39, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!39 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !40} ; [ DW_TAG_pointer_type ]
-!40 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !15, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!41 = metadata !{i32 32, i32 42, metadata !12, null}
-!42 = metadata !{i32 721153, metadata !12, metadata !"s", metadata !6, i32 50331680, metadata !43, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!43 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_reference_type ]
-!44 = metadata !{i32 32, i32 72, metadata !12, null}
-!45 = metadata !{i32 721153, metadata !12, metadata !"ppn", metadata !6, i32 67108896, metadata !46, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!46 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]
-!47 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!48 = metadata !{i32 32, i32 81, metadata !12, null}
-!49 = metadata !{i32 721153, metadata !12, metadata !"us", metadata !6, i32 83886112, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!50 = metadata !{i32 720932, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!51 = metadata !{i32 32, i32 105, metadata !12, null}
-!52 = metadata !{i32 721153, metadata !12, metadata !"l", metadata !6, i32 100663328, metadata !53, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!53 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !54} ; [ DW_TAG_const_type ]
-!54 = metadata !{i32 720932, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!55 = metadata !{i32 32, i32 135, metadata !12, null}
-!56 = metadata !{i32 721152, metadata !57, metadata !"result", metadata !6, i32 34, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!57 = metadata !{i32 720907, metadata !12, i32 33, i32 1, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
-!58 = metadata !{i32 34, i32 10, metadata !57, null}
-!59 = metadata !{i32 34, i32 59, metadata !57, null}
-!60 = metadata !{i32 34, i32 54, metadata !57, null}
-!61 = metadata !{i32 35, i32 3, metadata !57, null}
-!62 = metadata !{i32 721153, metadata !16, metadata !"argc", metadata !6, i32 16777254, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!63 = metadata !{i32 38, i32 14, metadata !16, null}
-!64 = metadata !{i32 721153, metadata !16, metadata !"argv", metadata !6, i32 33554470, metadata !65, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!65 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !66} ; [ DW_TAG_pointer_type ]
-!66 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!67 = metadata !{i32 38, i32 26, metadata !16, null}
-!68 = metadata !{i32 721152, metadata !69, metadata !"s", metadata !6, i32 40, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!69 = metadata !{i32 720907, metadata !16, i32 39, i32 1, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
-!70 = metadata !{i32 40, i32 22, metadata !69, null}
-!71 = metadata !{i32 721152, metadata !69, metadata !"f", metadata !6, i32 41, metadata !36, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!72 = metadata !{i32 41, i32 9, metadata !69, null}
-!73 = metadata !{i32 41, i32 16, metadata !69, null}
-!74 = metadata !{i32 721152, metadata !69, metadata !"d", metadata !6, i32 42, metadata !75, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!75 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !15, metadata !76, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!76 = metadata !{metadata !27, metadata !27}
-!77 = metadata !{i32 42, i32 10, metadata !69, null}
-!78 = metadata !{i32 42, i32 38, metadata !69, null}
-!79 = metadata !{i32 44, i32 3, metadata !69, null}
-!80 = metadata !{i32 45, i32 3, metadata !69, null}
-!81 = metadata !{i32 46, i32 3, metadata !69, null}
-!82 = metadata !{i32 721152, metadata !69, metadata !"result", metadata !6, i32 48, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!83 = metadata !{i32 48, i32 10, metadata !69, null}
-!84 = metadata !{i32 48, i32 19, metadata !69, null}
-!85 = metadata !{i32 49, i32 3, metadata !69, null}
+!19 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
+!20 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
+!21 = metadata !{i32 786451, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !22, i32 0, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [from ]
+!22 = metadata !{metadata !23, metadata !25, metadata !27}
+!23 = metadata !{i32 786445, metadata !21, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
+!24 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = metadata !{i32 786445, metadata !21, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !26} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
+!26 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !24, metadata !18, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
+!27 = metadata !{i32 786478, i32 0, metadata !21, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !28, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !31, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
+!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{null, metadata !30}
+!30 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !21} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
+!31 = metadata !{metadata !32}
+!32 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!33 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!34 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!35 = metadata !{i32 786468, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!36 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !37} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
+!37 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!38 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 38, metadata !39, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
+!39 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!40 = metadata !{metadata !9, metadata !9, metadata !41}
+!41 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !42} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!42 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!43 = metadata !{metadata !44}
+!44 = metadata !{metadata !45}
+!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !21, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
+!46 = metadata !{i32 29, i32 0, metadata !47, null}
+!47 = metadata !{i32 786443, metadata !5, i32 28, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!48 = metadata !{i32 786689, metadata !10, metadata !"pf", metadata !6, i32 16777248, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [pf] [line 32]
+!49 = metadata !{i32 32, i32 0, metadata !10, null}
+!50 = metadata !{i32 786689, metadata !10, metadata !"ppd", metadata !6, i32 33554464, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppd] [line 32]
+!51 = metadata !{i32 786689, metadata !10, metadata !"s", metadata !6, i32 50331680, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [s] [line 32]
+!52 = metadata !{i32 786689, metadata !10, metadata !"ppn", metadata !6, i32 67108896, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppn] [line 32]
+!53 = metadata !{i32 786689, metadata !10, metadata !"us", metadata !6, i32 83886112, metadata !35, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [us] [line 32]
+!54 = metadata !{i32 786689, metadata !10, metadata !"l", metadata !6, i32 100663328, metadata !36, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [l] [line 32]
+!55 = metadata !{i32 786688, metadata !56, metadata !"result", metadata !6, i32 34, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 34]
+!56 = metadata !{i32 786443, metadata !10, i32 33, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!57 = metadata !{i32 34, i32 0, metadata !56, null}
+!58 = metadata !{i32 35, i32 0, metadata !56, null}
+!59 = metadata !{i32 786689, metadata !38, metadata !"argc", metadata !6, i32 16777254, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 38]
+!60 = metadata !{i32 38, i32 0, metadata !38, null}
+!61 = metadata !{i32 786689, metadata !38, metadata !"argv", metadata !6, i32 33554470, metadata !41, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 38]
+!62 = metadata !{i32 786688, metadata !63, metadata !"s", metadata !6, i32 40, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 40]
+!63 = metadata !{i32 786443, metadata !38, i32 39, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!64 = metadata !{i32 40, i32 0, metadata !63, null}
+!65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
+!66 = metadata !{i32 41, i32 0, metadata !63, null}
+!67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
+!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !13, metadata !69, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
+!69 = metadata !{metadata !19, metadata !19}
+!70 = metadata !{i32 42, i32 0, metadata !63, null}
+!71 = metadata !{i32 44, i32 0, metadata !63, null}
+!72 = metadata !{i32 45, i32 0, metadata !63, null}
+!73 = metadata !{i32 46, i32 0, metadata !63, null}
+!74 = metadata !{i32 786688, metadata !63, metadata !"result", metadata !6, i32 48, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 48]
+!75 = metadata !{i32 48, i32 0, metadata !63, null}
+!76 = metadata !{i32 49, i32 0, metadata !63, null}
diff --git a/test/Linker/DbgDeclare.ll b/test/Linker/DbgDeclare.ll
new file mode 100644
index 0000000..7f64f95
--- /dev/null
+++ b/test/Linker/DbgDeclare.ll
@@ -0,0 +1,58 @@
+; RUN: llvm-link %s %p/DbgDeclare2.ll -o %t.bc
+; RUN: llvm-dis < %t.bc | FileCheck %s
+; Test if metadata in dbg.declare is mapped properly or not.
+
+; rdar://13089880
+; CHECK: define i32 @main(i32 %argc, i8** %argv)
+; CHECK: call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !{{[0-9]+}})
+; CHECK: call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !{{[0-9]+}})
+; CHECK: define void @test(i32 %argc, i8** %argv)
+; CHECK: call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !{{[0-9]+}})
+; CHECK: call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !{{[0-9]+}})
+; CHECK: call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !{{[0-9]+}})
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !14), !dbg !15
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !16), !dbg !15
+  %0 = load i32* %argc.addr, align 4, !dbg !17
+  %1 = load i8*** %argv.addr, align 8, !dbg !17
+  call void @test(i32 %0, i8** %1), !dbg !17
+  ret i32 0, !dbg !19
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @test(i32, i8**)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"main.cpp", metadata !"/private/tmp", metadata !"clang version 3.3 (trunk 173515)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"main.cpp", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 3, i32 0, metadata !5, null}
+!16 = metadata !{i32 786689, metadata !5, metadata !"argv", metadata !6, i32 33554435, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 5, i32 0, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !5, i32 4, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 6, i32 0, metadata !18, null}
diff --git a/test/Linker/DbgDeclare2.ll b/test/Linker/DbgDeclare2.ll
new file mode 100644
index 0000000..e2e56b2
--- /dev/null
+++ b/test/Linker/DbgDeclare2.ll
@@ -0,0 +1,76 @@
+; This file is used by 2011-08-04-DebugLoc.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define void @test(i32 %argc, i8** %argv) uwtable ssp {
+entry:
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %i = alloca i32, align 4
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !14), !dbg !15
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !16), !dbg !15
+  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !17), !dbg !20
+  store i32 0, i32* %i, align 4, !dbg !20
+  br label %for.cond, !dbg !20
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4, !dbg !20
+  %1 = load i32* %argc.addr, align 4, !dbg !20
+  %cmp = icmp slt i32 %0, %1, !dbg !20
+  br i1 %cmp, label %for.body, label %for.end, !dbg !20
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32* %i, align 4, !dbg !21
+  %idxprom = sext i32 %2 to i64, !dbg !21
+  %3 = load i8*** %argv.addr, align 8, !dbg !21
+  %arrayidx = getelementptr inbounds i8** %3, i64 %idxprom, !dbg !21
+  %4 = load i8** %arrayidx, align 8, !dbg !21
+  %call = call i32 @puts(i8* %4), !dbg !21
+  br label %for.inc, !dbg !23
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4, !dbg !20
+  %inc = add nsw i32 %5, 1, !dbg !20
+  store i32 %inc, i32* %i, align 4, !dbg !20
+  br label %for.cond, !dbg !20
+
+for.end:                                          ; preds = %for.cond
+  ret void, !dbg !24
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @puts(i8*)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"main.cpp", metadata !"/private/tmp", metadata !"clang version 3.3 (trunk 173515)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"print_args", metadata !"print_args", metadata !"test", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i8**)* @test, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"test.cpp", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 4, i32 0, metadata !5, null}
+!16 = metadata !{i32 786689, metadata !5, metadata !"argv", metadata !6, i32 33554436, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786688, metadata !18, metadata !"i", metadata !6, i32 6, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786443, metadata !19, i32 6, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786443, metadata !5, i32 5, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 6, i32 0, metadata !18, null}
+!21 = metadata !{i32 8, i32 0, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !18, i32 7, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 0, metadata !22, null}
+!24 = metadata !{i32 10, i32 0, metadata !19, null}
diff --git a/test/Linker/module-flags-1-a.ll b/test/Linker/module-flags-1-a.ll
index 973aa80..32f189c 100644
--- a/test/Linker/module-flags-1-a.ll
+++ b/test/Linker/module-flags-1-a.ll
@@ -3,10 +3,10 @@
 ; Test basic functionality of module flags.
 
 ; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
-; CHECK: !1 = metadata !{i32 1, metadata !"qux", i32 42}
+; CHECK: !1 = metadata !{i32 2, metadata !"bar", i32 42}
 ; CHECK: !2 = metadata !{i32 1, metadata !"mux", metadata !3}
 ; CHECK: !3 = metadata !{metadata !"hello world", i32 927}
-; CHECK: !4 = metadata !{i32 2, metadata !"bar", i32 42}
+; CHECK: !4 = metadata !{i32 1, metadata !"qux", i32 42}
 ; CHECK: !llvm.module.flags = !{!0, !1, !2, !4}
 
 !0 = metadata !{ i32 1, metadata !"foo", i32 37 }
diff --git a/test/Linker/module-flags-3-a.ll b/test/Linker/module-flags-3-a.ll
index 4233a0a..e7a720e 100644
--- a/test/Linker/module-flags-3-a.ll
+++ b/test/Linker/module-flags-3-a.ll
@@ -3,10 +3,10 @@
 ; Test 'require' behavior.
 
 ; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
-; CHECK: !1 = metadata !{i32 3, metadata !"foo", metadata !2}
-; CHECK: !2 = metadata !{metadata !"bar", i32 42}
-; CHECK: !3 = metadata !{i32 1, metadata !"bar", i32 42}
-; CHECK: !llvm.module.flags = !{!0, !1, !3}
+; CHECK: !1 = metadata !{i32 1, metadata !"bar", i32 42}
+; CHECK: !2 = metadata !{i32 3, metadata !"foo", metadata !3}
+; CHECK: !3 = metadata !{metadata !"bar", i32 42}
+; CHECK: !llvm.module.flags = !{!0, !1, !2}
 
 !0 = metadata !{ i32 1, metadata !"foo", i32 37 }
 !1 = metadata !{ i32 1, metadata !"bar", i32 42 }
diff --git a/test/Linker/module-flags-7-a.ll b/test/Linker/module-flags-7-a.ll
new file mode 100644
index 0000000..976c8fe
--- /dev/null
+++ b/test/Linker/module-flags-7-a.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/module-flags-7-b.ll -S -o - 2>&1 | FileCheck %s
+
+; Test module flags error messages.
+
+; CHECK: linking module flags 'foo': IDs have conflicting behaviors
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-7-b.ll b/test/Linker/module-flags-7-b.ll
new file mode 100644
index 0000000..2bc7250
--- /dev/null
+++ b/test/Linker/module-flags-7-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-7-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 2, metadata !"foo", i32 37 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-8-a.ll b/test/Linker/module-flags-8-a.ll
new file mode 100644
index 0000000..146cae7
--- /dev/null
+++ b/test/Linker/module-flags-8-a.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link %s %p/module-flags-8-b.ll -S -o - | sort | FileCheck %s
+
+; Test append-type module flags.
+
+; CHECK: !0 = metadata !{i32 5, metadata !"flag-0", metadata !1}
+; CHECK: !1 = metadata !{i32 0, i32 0, i32 1}
+; CHECK: !2 = metadata !{i32 6, metadata !"flag-1", metadata !3}
+; CHECK: !3 = metadata !{i32 0, i32 1, i32 2}
+; CHECK: !llvm.module.flags = !{!0, !2}
+
+!0 = metadata !{ i32 5, metadata !"flag-0", metadata !{ i32 0 } }
+!1 = metadata !{ i32 6, metadata !"flag-1", metadata !{ i32 0, i32 1 } }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-8-b.ll b/test/Linker/module-flags-8-b.ll
new file mode 100644
index 0000000..08f9bc4
--- /dev/null
+++ b/test/Linker/module-flags-8-b.ll
@@ -0,0 +1,7 @@
+; This file is used with module-flags-6-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 5, metadata !"flag-0", metadata !{ i32 0, i32 1 } }
+!1 = metadata !{ i32 6, metadata !"flag-1", metadata !{ i32 1, i32 2 } }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/testlink1.ll b/test/Linker/testlink1.ll
index a874637..6ba6fd5 100644
--- a/test/Linker/testlink1.ll
+++ b/test/Linker/testlink1.ll
@@ -13,6 +13,10 @@
 ; The uses of intlist in the other file should be remapped.
 ; CHECK-NOT: {{%intlist.[0-9]}}
 
+; CHECK: %VecSize = type { <5 x i32> }
+; CHECK: %VecSize.{{[0-9]}} = type { <10 x i32> }
+%VecSize = type { <5 x i32> }
+
 %Struct1 = type opaque
 @S1GV = external global %Struct1*
 
@@ -93,3 +97,5 @@ define internal void @Testintern() {
 define void @testIntern() {
   ret void
 }
+
+declare void @VecSizeCrash(%VecSize)
diff --git a/test/Linker/testlink2.ll b/test/Linker/testlink2.ll
index 1798e31..ff8e529 100644
--- a/test/Linker/testlink2.ll
+++ b/test/Linker/testlink2.ll
@@ -8,6 +8,8 @@
 %Ty1 = type { %Ty2* }
 %Ty2 = type opaque
 
+%VecSize = type { <10 x i32> }
+
 @GVTy1 = global %Ty1* null
 @GVTy2 = external global %Ty2*
 
@@ -53,3 +55,4 @@ define internal void @testIntern() {
   ret void
 }
 
+declare void @VecSizeCrash1(%VecSize)
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
new file mode 100644
index 0000000..1e9024c
--- /dev/null
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -0,0 +1,3713 @@
+// RUN: not llvm-mc -triple=aarch64 < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+
+//------------------------------------------------------------------------------
+// Add/sub (extended register)
+//------------------------------------------------------------------------------
+
+        // Mismatched final register and extend
+        add x2, x3, x5, sxtb
+        add x2, x4, w2, uxtx
+        add w5, w7, x9, sxtx
+// CHECK-ERROR: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR:         add x2, x3, x5, sxtb
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR:         add x2, x4, w2, uxtx
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR:         add w5, w7, x9, sxtx
+// CHECK-ERROR:                     ^
+
+        // Out of range extends
+        add x9, x10, w11, uxtb #-1
+        add x3, x5, w7, uxtb #5
+        sub x9, x15, x2, uxth #5
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR:         add x9, x10, w11, uxtb #-1
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR:         add x3, x5, w7, uxtb #5
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR:         sub x9, x15, x2, uxth #5
+// CHECK-ERROR:                          ^
+
+        // Wrong registers on normal variants
+        add xzr, x3, x5, uxtx
+        sub x3, xzr, w9, sxth #1
+        add x1, x2, sp, uxtx
+// CHECK-ERROR: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR:         add xzr, x3, x5, uxtx
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sub x3, xzr, w9, sxth #1
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR:         add x1, x2, sp, uxtx
+// CHECK-ERROR:                     ^
+
+        // Wrong registers on flag-setting variants
+        adds sp, x3, w2, uxtb
+        adds x3, xzr, x9, uxtx
+        subs x2, x1, sp, uxtx
+        adds x2, x1, sp, uxtb #2
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         adds sp, x3, w2, uxtb
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR:         adds x3, xzr, x9, uxtx
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR:         subs x2, x1, sp, uxtx
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR:         adds x2, x1, sp, uxtb #2
+// CHECK-ERROR:                      ^
+
+        // Amount not optional if lsl valid and used
+        add sp, x5, x7, lsl
+// CHECK-ERROR: error: expected #imm after shift specifier
+// CHECK-ERROR:         add sp, x5, x7, lsl
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Add/sub (immediate)
+//------------------------------------------------------------------------------
+
+// Out of range immediates: < 0 or more than 12 bits
+        add w4, w5, #-1
+        add w5, w6, #0x1000
+        add w4, w5, #-1, lsl #12
+        add w5, w6, #0x1000, lsl #12
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w4, w5, #-1
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w5, w6, #0x1000
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w4, w5, #-1, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w5, w6, #0x1000, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+
+// Only lsl #0 and lsl #12 are allowed
+        add w2, w3, #0x1, lsl #1
+        add w5, w17, #0xfff, lsl #13
+        add w17, w20, #0x1000, lsl #12
+        sub xsp, x34, #0x100, lsl #-1
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w2, w3, #0x1, lsl #1
+// CHECK-ERROR-NEXT:                                ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w5, w17, #0xfff, lsl #13
+// CHECK-ERROR-NEXT:                                   ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w17, w20, #0x1000, lsl #12
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: only 'lsl #+N' valid after immediate
+// CHECK-ERROR-NEXT:         sub xsp, x34, #0x100, lsl #-1
+// CHECK-ERROR-NEXT:                                    ^
+
+// Incorrect registers (w31 doesn't exist at all, and 31 decodes to sp for these).
+        add w31, w20, #1234
+        add wzr, w20, #0x123
+        add w20, wzr, #0x321
+        add wzr, wzr, #0xfff
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w31, w20, #1234
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add wzr, w20, #0x123
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w20, wzr, #0x321
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add wzr, wzr, #0xfff
+// CHECK-ERROR-NEXT:             ^
+
+// Mixed register classes
+        add xsp, w2, #123
+        sub w2, x30, #32
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add xsp, w2, #123
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub w2, x30, #32
+// CHECK-ERROR-NEXT:                 ^
+
+// Out of range immediate
+        adds w0, w5, #0x10000
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         adds w0, w5, #0x10000
+// CHECK-ERROR-NEXT:                      ^
+
+// Wn|WSP should be in second place
+        adds w4, wzr, #0x123
+// ...but wzr is the 31 destination
+        subs wsp, w5, #123
+        subs x5, xzr, #0x456, lsl #12
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds w4, wzr, #0x123
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs wsp, w5, #123
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs x5, xzr, #0x456, lsl #12
+// CHECK-ERROR-NEXT:                  ^
+
+        // MOV alias should not accept any fiddling
+        mov x2, xsp, #123
+        mov wsp, w27, #0xfff, lsl #12
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         mov x2, xsp, #123
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mov wsp, w27, #0xfff, lsl #12
+// CHECK-ERROR-NEXT:                       ^
+
+        // A relocation should be provided for symbols
+        add x3, x9, #variable
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add x3, x9, #variable
+// CHECK-ERROR-NEXT:                      ^
+
+
+//------------------------------------------------------------------------------
+// Add-subtract (shifted register)
+//------------------------------------------------------------------------------
+
+        add wsp, w1, w2, lsr #3
+        add x4, sp, x9, asr #5
+        add x9, x10, x5, ror #3
+// CHECK-ERROR: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add wsp, w1, w2, lsr #3
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x4, sp, x9, asr #5
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x9, x10, x5, ror #3
+// CHECK-ERROR-NEXT:                          ^
+
+        add w1, w2, w3, lsl #-1
+        add w1, w2, w3, lsl #32
+        add w1, w2, w3, lsr #-1
+        add w1, w2, w3, lsr #32
+        add w1, w2, w3, asr #-1
+        add w1, w2, w3, asr #32
+        add x1, x2, x3, lsl #-1
+        add x1, x2, x3, lsl #64
+        add x1, x2, x3, lsr #-1
+        add x1, x2, x3, lsr #64
+        add x1, x2, x3, asr #-1
+        add x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                         ^
+
+        adds w1, w2, w3, lsl #-1
+        adds w1, w2, w3, lsl #32
+        adds w1, w2, w3, lsr #-1
+        adds w1, w2, w3, lsr #32
+        adds w1, w2, w3, asr #-1
+        adds w1, w2, w3, asr #32
+        adds x1, x2, x3, lsl #-1
+        adds x1, x2, x3, lsl #64
+        adds x1, x2, x3, lsr #-1
+        adds x1, x2, x3, lsr #64
+        adds x1, x2, x3, asr #-1
+        adds x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                          ^
+
+        sub w1, w2, w3, lsl #-1
+        sub w1, w2, w3, lsl #32
+        sub w1, w2, w3, lsr #-1
+        sub w1, w2, w3, lsr #32
+        sub w1, w2, w3, asr #-1
+        sub w1, w2, w3, asr #32
+        sub x1, x2, x3, lsl #-1
+        sub x1, x2, x3, lsl #64
+        sub x1, x2, x3, lsr #-1
+        sub x1, x2, x3, lsr #64
+        sub x1, x2, x3, asr #-1
+        sub x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                         ^
+
+        subs w1, w2, w3, lsl #-1
+        subs w1, w2, w3, lsl #32
+        subs w1, w2, w3, lsr #-1
+        subs w1, w2, w3, lsr #32
+        subs w1, w2, w3, asr #-1
+        subs w1, w2, w3, asr #32
+        subs x1, x2, x3, lsl #-1
+        subs x1, x2, x3, lsl #64
+        subs x1, x2, x3, lsr #-1
+        subs x1, x2, x3, lsr #64
+        subs x1, x2, x3, asr #-1
+        subs x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                          ^
+
+        cmn w9, w10, lsl #-1
+        cmn w9, w10, lsl #32
+        cmn w11, w12, lsr #-1
+        cmn w11, w12, lsr #32
+        cmn w19, wzr, asr #-1
+        cmn wzr, wzr, asr #32
+        cmn x9, x10, lsl #-1
+        cmn x9, x10, lsl #64
+        cmn x11, x12, lsr #-1
+        cmn x11, x12, lsr #64
+        cmn x19, xzr, asr #-1
+        cmn xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmn w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmn w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         cmn wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmn x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmn x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         cmn xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        cmp w9, w10, lsl #-1
+        cmp w9, w10, lsl #32
+        cmp w11, w12, lsr #-1
+        cmp w11, w12, lsr #32
+        cmp w19, wzr, asr #-1
+        cmp wzr, wzr, asr #32
+        cmp x9, x10, lsl #-1
+        cmp x9, x10, lsl #64
+        cmp x11, x12, lsr #-1
+        cmp x11, x12, lsr #64
+        cmp x19, xzr, asr #-1
+        cmp xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmp w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmp w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         cmp wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmp x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmp x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         cmp xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        neg w9, w10, lsl #-1
+        neg w9, w10, lsl #32
+        neg w11, w12, lsr #-1
+        neg w11, w12, lsr #32
+        neg w19, wzr, asr #-1
+        neg wzr, wzr, asr #32
+        neg x9, x10, lsl #-1
+        neg x9, x10, lsl #64
+        neg x11, x12, lsr #-1
+        neg x11, x12, lsr #64
+        neg x19, xzr, asr #-1
+        neg xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         neg w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         neg w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         neg wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         neg x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         neg x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         neg xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        negs w9, w10, lsl #-1
+        negs w9, w10, lsl #32
+        negs w11, w12, lsr #-1
+        negs w11, w12, lsr #32
+        negs w19, wzr, asr #-1
+        negs wzr, wzr, asr #32
+        negs x9, x10, lsl #-1
+        negs x9, x10, lsl #64
+        negs x11, x12, lsr #-1
+        negs x11, x12, lsr #64
+        negs x19, xzr, asr #-1
+        negs xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         negs w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         negs w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         negs wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         negs x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         negs x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         negs xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Add-subtract (shifted register)
+//------------------------------------------------------------------------------
+
+        adc wsp, w3, w5
+        adc w1, wsp, w2
+        adc w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        adc wsp, w3, w5
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc w1, wsp, w2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc w0, w10, wsp
+// CHECK-ERROR-NEXT:                      ^
+
+        adc sp, x3, x5
+        adc x1, sp, x2
+        adc x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc sp, x3, x5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc x1, sp, x2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc x0, x10, sp
+// CHECK-ERROR-NEXT:                      ^
+
+        adcs wsp, w3, w5
+        adcs w1, wsp, w2
+        adcs w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs wsp, w3, w5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs w1, wsp, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs w0, w10, wsp
+// CHECK-ERROR-NEXT:                       ^
+
+        adcs sp, x3, x5
+        adcs x1, sp, x2
+        adcs x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs sp, x3, x5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs x1, sp, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs x0, x10, sp
+// CHECK-ERROR-NEXT:                       ^
+
+        sbc wsp, w3, w5
+        sbc w1, wsp, w2
+        sbc w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc wsp, w3, w5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc w1, wsp, w2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc w0, w10, wsp
+// CHECK-ERROR-NEXT:                      ^
+
+        sbc sp, x3, x5
+        sbc x1, sp, x2
+        sbc x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc sp, x3, x5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc x1, sp, x2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc x0, x10, sp
+// CHECK-ERROR-NEXT:                      ^
+
+        sbcs wsp, w3, w5
+        sbcs w1, wsp, w2
+        sbcs w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs wsp, w3, w5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs w1, wsp, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs w0, w10, wsp
+// CHECK-ERROR-NEXT:                       ^
+
+        sbcs sp, x3, x5
+        sbcs x1, sp, x2
+        sbcs x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs sp, x3, x5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs x1, sp, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs x0, x10, sp
+// CHECK-ERROR-NEXT:                       ^
+
+        ngc wsp, w3
+        ngc w9, wsp
+        ngc sp, x9
+        ngc x2, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc wsp, w3
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc w9, wsp
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc sp, x9
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc x2, sp
+// CHECK-ERROR-NEXT:                 ^
+
+        ngcs wsp, w3
+        ngcs w9, wsp
+        ngcs sp, x9
+        ngcs x2, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs wsp, w3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs w9, wsp
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs sp, x9
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs x2, sp
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Bitfield
+//------------------------------------------------------------------------------
+
+        sbfm x3, w13, #0, #0
+        sbfm w12, x9, #0, #0
+        sbfm sp, x3, #3, #5
+        sbfm w3, wsp, #1, #9
+        sbfm x9, x5, #-1, #0
+        sbfm x9, x5, #0, #-1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm x3, w13, #0, #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm w12, x9, #0, #0
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm sp, x3, #3, #5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm w3, wsp, #1, #9
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         sbfm x9, x5, #-1, #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         sbfm x9, x5, #0, #-1
+// CHECK-ERROR-NEXT:                          ^
+
+        sbfm w3, w5, #32, #1
+        sbfm w7, w11, #19, #32
+        sbfm x29, x30, #64, #0
+        sbfm x10, x20, #63, #64
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         sbfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         sbfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        ubfm w3, w5, #32, #1
+        ubfm w7, w11, #19, #32
+        ubfm x29, x30, #64, #0
+        ubfm x10, x20, #63, #64
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         ubfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         ubfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        bfm w3, w5, #32, #1
+        bfm w7, w11, #19, #32
+        bfm x29, x30, #64, #0
+        bfm x10, x20, #63, #64
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         bfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         bfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        sxtb x3, x2
+        sxth xzr, xzr
+        sxtw x3, x5
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxtb x3, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxth xzr, xzr
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxtw x3, x5
+// CHECK-ERROR-NEXT:                  ^
+
+        uxtb x3, x12
+        uxth x5, x9
+        uxtw x3, x5
+        uxtb x2, sp
+        uxtb sp, xzr
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb x3, x12
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxth x5, x9
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         uxtw x3, x5
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb x2, sp
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb sp, xzr
+// CHECK-ERROR-NEXT:              ^
+
+        asr x3, w2, #1
+        asr sp, x2, #1
+        asr x25, x26, #-1
+        asr x25, x26, #64
+        asr w9, w8, #32
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr x3, w2, #1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr sp, x2, #1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         asr x25, x26, #-1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         asr x25, x26, #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         asr w9, w8, #32
+// CHECK-ERROR-NEXT:                     ^
+
+        sbfiz w1, w2, #0, #0
+        sbfiz wsp, w9, #0, #1
+        sbfiz w9, w10, #32, #1
+        sbfiz w11, w12, #32, #0
+        sbfiz w9, w10, #10, #23
+        sbfiz x3, x5, #12, #53
+        sbfiz sp, x3, #5, #6
+        sbfiz w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         sbfiz w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfiz w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfiz w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         sbfiz w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         sbfiz x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        sbfx w1, w2, #0, #0
+        sbfx wsp, w9, #0, #1
+        sbfx w9, w10, #32, #1
+        sbfx w11, w12, #32, #0
+        sbfx w9, w10, #10, #23
+        sbfx x3, x5, #12, #53
+        sbfx sp, x3, #5, #6
+        sbfx w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         sbfx w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfx w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfx w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         sbfx w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         sbfx x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx sp, x3, #5, #6
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                  ^
+
+        bfi w1, w2, #0, #0
+        bfi wsp, w9, #0, #1
+        bfi w9, w10, #32, #1
+        bfi w11, w12, #32, #0
+        bfi w9, w10, #10, #23
+        bfi x3, x5, #12, #53
+        bfi sp, x3, #5, #6
+        bfi w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         bfi w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfi w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfi w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         bfi w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         bfi x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi sp, x3, #5, #6
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                 ^
+
+        bfxil w1, w2, #0, #0
+        bfxil wsp, w9, #0, #1
+        bfxil w9, w10, #32, #1
+        bfxil w11, w12, #32, #0
+        bfxil w9, w10, #10, #23
+        bfxil x3, x5, #12, #53
+        bfxil sp, x3, #5, #6
+        bfxil w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         bfxil w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfxil w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfxil w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         bfxil w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         bfxil x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        ubfiz w1, w2, #0, #0
+        ubfiz wsp, w9, #0, #1
+        ubfiz w9, w10, #32, #1
+        ubfiz w11, w12, #32, #0
+        ubfiz w9, w10, #10, #23
+        ubfiz x3, x5, #12, #53
+        ubfiz sp, x3, #5, #6
+        ubfiz w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         ubfiz w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfiz w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfiz w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         ubfiz w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         ubfiz x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        ubfx w1, w2, #0, #0
+        ubfx wsp, w9, #0, #1
+        ubfx w9, w10, #32, #1
+        ubfx w11, w12, #32, #0
+        ubfx w9, w10, #10, #23
+        ubfx x3, x5, #12, #53
+        ubfx sp, x3, #5, #6
+        ubfx w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         ubfx w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfx w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfx w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         ubfx w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         ubfx x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx sp, x3, #5, #6
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Compare & branch (immediate)
+//------------------------------------------------------------------------------
+
+        cbnz wsp, lbl
+        cbz  sp, lbl
+        cbz  x3, x5
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbnz wsp, lbl
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbz sp, lbl
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           cbz x3, x5
+// CHECK-ERROR-NEXT:                   ^
+
+        cbz w20, #1048576
+        cbnz xzr, #-1048580
+        cbz x29, #1
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           cbz w20, #1048576
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           cbnz xzr, #-1048580
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           cbz x29, #1
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Conditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b.zf lbl
+// CHECK-ERROR: error: invalid condition code
+// CHECK-ERROR-NEXT:           b.zf lbl
+// CHECK-ERROR-NEXT:             ^
+
+        b.eq #1048576
+        b.ge #-1048580
+        b.cc #1
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           b.eq #1048576
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           b.ge #-1048580
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           b.cc #1
+// CHECK-ERROR-NEXT:                ^
+
+//------------------------------------------------------------------------------
+// Conditional compare (immediate)
+//------------------------------------------------------------------------------
+
+        ccmp wsp, #4, #2, ne
+        ccmp w25, #-1, #15, hs
+        ccmp w3, #32, #0, ge
+        ccmp w19, #5, #-1, lt
+        ccmp w20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp wsp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp w25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp w3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp w19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp w20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmp sp, #4, #2, ne
+        ccmp x25, #-1, #15, hs
+        ccmp x3, #32, #0, ge
+        ccmp x19, #5, #-1, lt
+        ccmp x20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp sp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp x25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp x3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp x19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp x20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn wsp, #4, #2, ne
+        ccmn w25, #-1, #15, hs
+        ccmn w3, #32, #0, ge
+        ccmn w19, #5, #-1, lt
+        ccmn w20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn wsp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn w25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn w3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn w19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn w20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn sp, #4, #2, ne
+        ccmn x25, #-1, #15, hs
+        ccmn x3, #32, #0, ge
+        ccmn x19, #5, #-1, lt
+        ccmn x20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn sp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn x25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn x3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn x19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn x20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Conditional compare (register)
+//------------------------------------------------------------------------------
+
+        ccmp wsp, w4, #2, ne
+        ccmp w3, wsp, #0, ge
+        ccmp w19, w5, #-1, lt
+        ccmp w20, w7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp wsp, w4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp w3, wsp, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp w19, w5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp w20, w7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmp sp, x4, #2, ne
+        ccmp x25, sp, #15, hs
+        ccmp x19, x5, #-1, lt
+        ccmp x20, x7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp sp, x4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp x25, sp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp x19, x5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp x20, x7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn wsp, w4, #2, ne
+        ccmn w25, wsp, #15, hs
+        ccmn w19, w5, #-1, lt
+        ccmn w20, w7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn wsp, w4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn w25, wsp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn w19, w5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn w20, w7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn sp, x4, #2, ne
+        ccmn x25, sp, #15, hs
+        ccmn x19, x5, #-1, lt
+        ccmn x20, x7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn sp, x4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn x25, sp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn x19, x5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn x20, x7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Conditional select
+//------------------------------------------------------------------------------
+
+        csel w4, wsp, w9, eq
+        csel wsp, w2, w3, ne
+        csel w10, w11, wsp, ge
+        csel w1, w2, w3, #3
+        csel x4, sp, x9, eq
+        csel sp, x2, x3, ne
+        csel x10, x11, sp, ge
+        csel x1, x2, x3, #3
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel w4, wsp, w9, eq
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel wsp, w2, w3, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel w10, w11, wsp, ge
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected AArch64 condition code
+// CHECK-ERROR-NEXT:        csel w1, w2, w3, #3
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel x4, sp, x9, eq
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel sp, x2, x3, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel x10, x11, sp, ge
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected AArch64 condition code
+// CHECK-ERROR-NEXT:        csel x1, x2, x3, #3
+// CHECK-ERROR-NEXT:                         ^
+
+        csinc w20, w21, wsp, mi
+        csinc sp, x30, x29, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinc w20, w21, wsp, mi
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinc sp, x30, x29, eq
+// CHECK-ERROR-NEXT:              ^
+
+        csinv w20, wsp, wsp, mi
+        csinv sp, x30, x29, le
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinv w20, wsp, wsp, mi
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinv sp, x30, x29, le
+// CHECK-ERROR-NEXT:              ^
+
+        csneg w20, w21, wsp, mi
+        csneg x0, sp, x29, le
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csneg w20, w21, wsp, mi
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csneg x0, sp, x29, le
+// CHECK-ERROR-NEXT:                  ^
+
+        cset wsp, lt
+        csetm sp, ge
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cset wsp, lt
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csetm sp, ge
+// CHECK-ERROR-NEXT:              ^
+
+        cinc w3, wsp, ne
+        cinc sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinc w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinc sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+        cinv w3, wsp, ne
+        cinv sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinv w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinv sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+        cneg w3, wsp, ne
+        cneg sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cneg w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cneg sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+//------------------------------------------------------------------------------
+// Data Processing (1 source)
+//------------------------------------------------------------------------------
+        rbit x23, w2
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     rbit x23, w2
+
+        cls sp, x2
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     cls sp, x2
+
+        clz wsp, w3
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     clz wsp, w3
+
+//------------------------------------------------------------------------------
+// Data Processing (2 sources)
+//------------------------------------------------------------------------------
+        udiv x23, w2, x18
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     udiv x23, w2, x18
+
+        lsl sp, x2, x4
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     lsl sp, x2, x4
+
+        asr wsp, w3, w9
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     asr wsp, w3, w9
+
+//------------------------------------------------------------------------------
+// Data Processing (3 sources)
+//------------------------------------------------------------------------------
+
+        madd sp, x3, x9, x10
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     madd sp, x3, x9, x10
+
+//------------------------------------------------------------------------------
+// Exception generation
+//------------------------------------------------------------------------------
+        svc #-1
+        hlt #65536
+        dcps4 #43
+        dcps4
+// CHECK-ERROR: error: expected integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         svc #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         hlt #65536
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         dcps4 #43
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         dcps4
+// CHECK-ERROR-NEXT:         ^
+
+//------------------------------------------------------------------------------
+// Extract (immediate)
+//------------------------------------------------------------------------------
+
+        extr w2, w20, w30, #-1
+        extr w9, w19, w20, #32
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         extr w2, w20, w30, #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         extr w9, w19, w20, #32
+// CHECK-ERROR-NEXT:                            ^
+
+        extr x10, x15, x20, #-1
+        extr x20, x25, x30, #64
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         extr x10, x15, x20, #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         extr x20, x25, x30, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        ror w9, w10, #32
+        ror x10, x11, #64
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ror w9, w10, #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         ror x10, x11, #64
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Floating-point compare
+//------------------------------------------------------------------------------
+
+        fcmp s3, d2
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmp s3, d2
+// CHECK-ERROR-NEXT:                  ^
+
+        fcmp s9, #-0.0
+        fcmp d3, #-0.0
+        fcmp s1, #1.0
+        fcmpe s30, #-0.0
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmp s9, #-0.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmp d3, #-0.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmp s1, #1.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmpe s30, #-0.0
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fccmp s19, s5, #-1, lt
+        fccmp s20, s7, #16, hs
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmp s19, s5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmp s20, s7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmp d19, d5, #-1, lt
+        fccmp d20, d7, #16, hs
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmp d19, d5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmp d20, d7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmpe s19, s5, #-1, lt
+        fccmpe s20, s7, #16, hs
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmpe s19, s5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmpe s20, s7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmpe d19, d5, #-1, lt
+        fccmpe d20, d7, #16, hs
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmpe d19, d5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmpe d20, d7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcsel q3, q20, q9, pl
+        fcsel h9, h10, h11, mi
+        fcsel b9, b10, b11, mi
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel q3, q20, q9, pl
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel h9, h10, h11, mi
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel b9, b10, b11, mi
+// CHECK-ERROR-NEXT:               ^
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (1 source)
+//------------------------------------------------------------------------------
+
+        fmov d0, s3
+        fcvt d0, d1
+// CHECK-ERROR: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov d0, s3
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fcvt d0, d1
+// CHECK-ERROR-NEXT:                    ^
+
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (2 sources)
+//------------------------------------------------------------------------------
+
+        fadd s0, d3, d7
+        fmaxnm d3, s19, d12
+        fnmul d1, d9, s18
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fadd s0, d3, d7
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmaxnm d3, s19, d12
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fnmul d1, d9, s18
+// CHECK-ERROR-NEXT: ^
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (3 sources)
+//------------------------------------------------------------------------------
+
+        fmadd b3, b4, b5, b6
+        fmsub h1, h2, h3, h4
+        fnmadd q3, q5, q6, q7
+        fnmsub s2, s4, d5, h9
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fmadd b3, b4, b5, b6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fmsub h1, h2, h3, h4
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fnmadd q3, q5, q6, q7
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fnmsub s2, s4, d5, h9
+// CHECK-ERROR-NEXT:                ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcvtzs w13, s31, #0
+        fcvtzs w19, s20, #33
+        fcvtzs wsp, s19, #14
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT:        fcvtzs w13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT:        fcvtzs w19, s20, #33
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs wsp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzs x13, s31, #0
+        fcvtzs x19, s20, #65
+        fcvtzs sp, s19, #14
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT:        fcvtzs x13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT:        fcvtzs x19, s20, #65
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs sp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzu w13, s31, #0
+        fcvtzu w19, s20, #33
+        fcvtzu wsp, s19, #14
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT:        fcvtzu w13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT:        fcvtzu w19, s20, #33
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu wsp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzu x13, s31, #0
+        fcvtzu x19, s20, #65
+        fcvtzu sp, s19, #14
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT:        fcvtzu x13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT:        fcvtzu x19, s20, #65
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu sp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        scvtf w13, s31, #0
+        scvtf w19, s20, #33
+        scvtf wsp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf w13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf w19, s20, #33
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf wsp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        scvtf x13, s31, #0
+        scvtf x19, s20, #65
+        scvtf sp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf x13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf x19, s20, #65
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf sp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        ucvtf w13, s31, #0
+        ucvtf w19, s20, #33
+        ucvtf wsp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf w13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf w19, s20, #33
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf wsp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        ucvtf x13, s31, #0
+        ucvtf x19, s20, #65
+        ucvtf sp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf x13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf x19, s20, #65
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf sp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+        ;; Exponent too large
+        fmov d3, #0.0625
+        fmov s2, #32.0
+// CHECK-ERROR: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov d3, #0.0625
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov s2, #32.0
+// CHECK-ERROR-NEXT:                    ^
+
+        ;; Fraction too precise
+        fmov s9, #1.03125
+        fmov s28, #1.96875
+// CHECK-ERROR: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov s9, #1.03125
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov s28, #1.96875
+// CHECK-ERROR-NEXT:                     ^
+
+        ;; No particular reason, but a striking omission
+        fmov d0, #0.0
+// CHECK-ERROR: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov d0, #0.0
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Floating-point <-> integer conversion
+//------------------------------------------------------------------------------
+
+        fmov x3, v0.d[0]
+        fmov v29.1d[1], x2
+        fmov x7, v0.d[2]
+        fcvtns sp, s5
+        scvtf s6, wsp
+// CHECK-ERROR: error: expected lane specifier '[1]'
+// CHECK-ERROR-NEXT:         fmov x3, v0.d[0]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-NEXT: fmov v29.1d[1], x2
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-NEXT: fmov x7, v0.d[2]
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcvtns sp, s5
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         scvtf s6, wsp
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// Load-register (literal)
+//------------------------------------------------------------------------------
+
+        ldr sp, some_label
+        ldrsw w3, somewhere
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr sp, some_label
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw w3, somewhere
+// CHECK-ERROR-NEXT:               ^
+
+        ldrsw x2, #1048576
+        ldr q0, #-1048580
+        ldr x0, #2
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         ldrsw x2, #1048576
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         ldr q0, #-1048580
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         ldr x0, #2
+// CHECK-ERROR-NEXT:                 ^
+
+//------------------------------------------------------------------------------
+// Load/store exclusive
+//------------------------------------------------------------------------------
+
+       stxrb w2, x3, [x4, #20]
+       stlxrh w10, w11, [w2]
+// CHECK-ERROR: error: expected '#0'
+// CHECK-ERROR-NEXT:         stxrb w2, x3, [x4, #20]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxrh w10, w11, [w2]
+// CHECK-ERROR-NEXT:                           ^
+
+       stlxr  x20, w21, [sp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxr  x20, w21, [sp]
+// CHECK-ERROR-NEXT:                ^
+
+       ldxr   sp, [sp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldxr   sp, [sp]
+// CHECK-ERROR-NEXT:                ^
+
+       stxp x1, x2, x3, [x4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stxp x1, x2,  x3, [x4]
+// CHECK-ERROR-NEXT:              ^
+
+       stlxp w5, x1, w4, [x5]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxp w5, x1, w4, [x5]
+// CHECK-ERROR-NEXT:                       ^
+
+       stlxp w17, w6, x7, [x22]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxp w17, w6, x7, [x22]
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Load/store (unscaled immediate)
+//------------------------------------------------------------------------------
+
+        ldurb w2, [sp, #256]
+        sturh w17, [x1, #256]
+        ldursw x20, [x1, #256]
+        ldur x12, [sp, #256]
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:        ldurb w2, [sp, #256]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         sturh w17, [x1, #256]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldursw x20, [x1, #256]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldur x12, [sp, #256]
+// CHECK-ERROR-NEXT:                   ^
+
+        stur h2, [x2, #-257]
+        stur b2, [x2, #-257]
+        ldursb x9, [sp, #-257]
+        ldur w2, [x30, #-257]
+        stur q9, [x20, #-257]
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         stur h2, [x2, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         stur b2, [x2, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldursb x9, [sp, #-257]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldur w2, [x30, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         stur q9, [x20, #-257]
+// CHECK-ERROR-NEXT:                  ^
+
+        prfum pstl3strm, [xzr]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         prfum pstl3strm, [xzr]
+// CHECK-ERROR-NEXT:                           ^
+
+//------------------------------------------------------------------------------
+// Load-store register (immediate post-indexed)
+//------------------------------------------------------------------------------
+        ldr x3, [x4, #25], #0
+        ldr x4, [x9, #0], #4
+// CHECK-ERROR: error: expected symbolic reference or integer in range [0, 32760]
+// CHECK-ERROR-NEXT:         ldr x3, [x4, #25], #0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr x4, [x9, #0], #4
+// CHECK-ERROR-NEXT:                           ^
+
+        strb w1, [x19], #256
+        strb w9, [sp], #-257
+        strh w1, [x19], #256
+        strh w9, [sp], #-257
+        str w1, [x19], #256
+        str w9, [sp], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strb w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strb w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strh w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strh w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str w1, [x19], #256
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str w9, [sp], #-257
+// CHECK-ERROR-NEXT:                       ^
+
+        ldrb w1, [x19], #256
+        ldrb w9, [sp], #-257
+        ldrh w1, [x19], #256
+        ldrh w9, [sp], #-257
+        ldr w1, [x19], #256
+        ldr w9, [sp], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrb w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrb w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrh w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrh w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr w1, [x19], #256
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr w9, [sp], #-257
+// CHECK-ERROR-NEXT:                       ^
+
+        ldrsb x2, [x3], #256
+        ldrsb x22, [x13], #-257
+        ldrsh x2, [x3], #256
+        ldrsh x22, [x13], #-257
+        ldrsw x2, [x3], #256
+        ldrsw x22, [x13], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsw x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsw x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+
+        ldrsb w2, [x3], #256
+        ldrsb w22, [x13], #-257
+        ldrsh w2, [x3], #256
+        ldrsh w22, [x13], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb w2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb w22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh w2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh w22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+
+        str b3, [x3], #256
+        str b3, [x13], #-257
+        str h3, [x3], #256
+        str h3, [x13], #-257
+        str s3, [x3], #256
+        str s3, [x13], #-257
+        str d3, [x3], #256
+        str d3, [x13], #-257
+        str q3, [x3], #256
+        str q3, [x13], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str b3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str b3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str h3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str h3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str s3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str s3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str d3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str d3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str q3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str q3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+
+        ldr b3, [x3], #256
+        ldr b3, [x13], #-257
+        ldr h3, [x3], #256
+        ldr h3, [x13], #-257
+        ldr s3, [x3], #256
+        ldr s3, [x13], #-257
+        ldr d3, [x3], #256
+        ldr d3, [x13], #-257
+        ldr q3, [x3], #256
+        ldr q3, [x13], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr b3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr b3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr h3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr h3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr s3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr s3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr d3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr d3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr q3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr q3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Load-store register (immediate pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldr x3, [x4]!
+// CHECK-ERROR: error:
+// CHECK-ERROR-NEXT:         ldr x3, [x4]!
+// CHECK-ERROR-NEXT:                     ^
+
+        strb w1, [x19, #256]!
+        strb w9, [sp, #-257]!
+        strh w1, [x19, #256]!
+        strh w9, [sp, #-257]!
+        str w1, [x19, #256]!
+        str w9, [sp, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strb w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strb w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strh w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldrb w1, [x19, #256]!
+        ldrb w9, [sp, #-257]!
+        ldrh w1, [x19, #256]!
+        ldrh w9, [sp, #-257]!
+        ldr w1, [x19, #256]!
+        ldr w9, [sp, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrb w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrb w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrh w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrh w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldrsb x2, [x3, #256]!
+        ldrsb x22, [x13, #-257]!
+        ldrsh x2, [x3, #256]!
+        ldrsh x22, [x13, #-257]!
+        ldrsw x2, [x3, #256]!
+        ldrsw x22, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsw x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+
+        ldrsb w2, [x3, #256]!
+        ldrsb w22, [x13, #-257]!
+        ldrsh w2, [x3, #256]!
+        ldrsh w22, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb w2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb w22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh w2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh w22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+
+        str b3, [x3, #256]!
+        str b3, [x13, #-257]!
+        str h3, [x3, #256]!
+        str h3, [x13, #-257]!
+        str s3, [x3, #256]!
+        str s3, [x13, #-257]!
+        str d3, [x3, #256]!
+        str d3, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str b3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str b3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str h3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str h3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str s3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str s3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str d3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str d3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldr b3, [x3, #256]!
+        ldr b3, [x13, #-257]!
+        ldr h3, [x3, #256]!
+        ldr h3, [x13, #-257]!
+        ldr s3, [x3, #256]!
+        ldr s3, [x13, #-257]!
+        ldr d3, [x3, #256]!
+        ldr d3, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr b3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr b3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr h3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr h3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr s3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr s3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr d3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr d3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+//------------------------------------------------------------------------------
+// Load/store (unprivileged)
+//------------------------------------------------------------------------------
+
+        ldtrb w2, [sp, #256]
+        sttrh w17, [x1, #256]
+        ldtrsw x20, [x1, #256]
+        ldtr x12, [sp, #256]
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:        ldtrb w2, [sp, #256]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         sttrh w17, [x1, #256]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldtrsw x20, [x1, #256]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldtr x12, [sp, #256]
+// CHECK-ERROR-NEXT:                   ^
+
+        sttr h2, [x2, #-257]
+        sttr b2, [x2, #-257]
+        ldtrsb x9, [sp, #-257]
+        ldtr w2, [x30, #-257]
+        sttr q9, [x20, #-257]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr h2, [x2, #-257]
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr b2, [x2, #-257]
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldtrsb x9, [sp, #-257]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldtr w2, [x30, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr q9, [x20, #-257]
+// CHECK-ERROR-NEXT:                  ^
+
+
+//------------------------------------------------------------------------------
+// Load/store (unsigned immediate)
+//------------------------------------------------------------------------------
+
+//// Out of range immediates
+        ldr q0, [x11, #65536]
+        ldr x0, [sp, #32768]
+        ldr w0, [x4, #16384]
+        ldrh w2, [x21, #8192]
+        ldrb w3, [x12, #4096]
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr q0, [x11, #65536]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr x0, [sp, #32768]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr w0, [x4, #16384]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrh w2, [x21, #8192]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrb w3, [x12, #4096]
+// CHECK-ERROR-NEXT:                  ^
+
+//// Misaligned addresses
+        ldr w0, [x0, #2]
+        ldrsh w2, [x0, #123]
+        str q0, [x0, #8]
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         ldr w0, [x0, #2]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         ldrsh w2, [x0, #123]
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         str q0, [x0, #8]
+// CHECK-ERROR-NEXT:                 ^
+
+//// 32-bit addresses
+        ldr w0, [w20]
+        ldrsh x3, [wsp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w0, [w20]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x3, [wsp]
+// CHECK-ERROR-NEXT:                    ^
+
+//// Store things
+        strb w0, [wsp]
+        strh w31, [x23, #1]
+        str x5, [x22, #12]
+        str w7, [x12, #16384]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: strb w0, [wsp]
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w31, [x23, #1]
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         str x5, [x22, #12]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str w7, [x12, #16384]
+// CHECK-ERROR-NEXT:                 ^
+
+//// Bad PRFMs
+        prfm #-1, [sp]
+        prfm #32, [sp, #8]
+        prfm pldl1strm, [w3, #8]
+        prfm wibble, [sp]
+// CHECK-ERROR: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:        prfm #-1, [sp]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:        prfm #32, [sp, #8]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        prfm pldl1strm, [w3, #8]
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-NEXT:        prfm wibble, [sp]
+// CHECK-ERROR-NEXT:             ^
+
+//------------------------------------------------------------------------------
+// Load/store register (register offset)
+//------------------------------------------------------------------------------
+
+        ldr w3, [xzr, x3]
+        ldr w4, [x0, x4, lsl]
+        ldr w9, [x5, x5, uxtw]
+        ldr w10, [x6, x9, sxtw #2]
+        ldr w11, [x7, w2, lsl #2]
+        ldr w12, [x8, w1, sxtx]
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ldr w3, [xzr, x3]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected #imm after shift specifier
+// CHECK-ERROR-NEXT:         ldr w4, [x0, x4, lsl]
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldr w9, [x5, x5, uxtw]
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldr w10, [x6, x9, sxtw #2]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldr w11, [x7, w2, lsl #2]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldr w12, [x8, w1, sxtx]
+// CHECK-ERROR-NEXT:                           ^
+
+        ldrsb w9, [x4, x2, lsl #-1]
+        strb w9, [x4, x2, lsl #1]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldrsb w9, [x4, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0
+// CHECK-ERROR-NEXT:         strb w9, [x4, x2, lsl #1]
+// CHECK-ERROR-NEXT:                  ^
+
+        ldrsh w9, [x4, x2, lsl #-1]
+        ldr h13, [x4, w2, uxtw #2]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldrsh w9, [x4, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #1
+// CHECK-ERROR-NEXT:         ldr h13, [x4, w2, uxtw #2]
+// CHECK-ERROR-NEXT:                           ^
+
+        str w9, [x5, w9, sxtw #-1]
+        str s3, [sp, w9, uxtw #1]
+        ldrsw x9, [x15, x4, sxtx #3]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         str w9, [x5, w9, sxtw #-1]
+// CHECK-ERROR-NEXT:                                ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         str s3, [sp, w9, uxtw #1]
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldrsw x9, [x15, x4, sxtx #3]
+// CHECK-ERROR-NEXT:                             ^
+
+        str xzr, [x5, x9, sxtx #-1]
+        prfm pldl3keep, [sp, x20, lsl #2]
+        ldr d3, [x20, wzr, uxtw #4]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         str xzr, [x5, x9, sxtx #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #3
+// CHECK-ERROR-NEXT:         prfm pldl3keep, [sp, x20, lsl #2]
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3
+// CHECK-ERROR-NEXT:         ldr d3, [x20, wzr, uxtw #4]
+// CHECK-ERROR-NEXT:                 ^
+
+        ldr q5, [sp, x2, lsl #-1]
+        ldr q10, [x20, w4, uxtw #2]
+        str q21, [x20, w4, uxtw #5]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldr q5, [sp, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4
+// CHECK-ERROR-NEXT:         ldr q10, [x20, w4, uxtw #2]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4
+// CHECK-ERROR-NEXT:         str q21, [x20, w4, uxtw #5]
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+        ldp w3, w2, [x4, #1]
+        stp w1, w2, [x3, #253]
+        stp w9, w10, [x5, #256]
+        ldp w11, w12, [x9, #-260]
+        stp wsp, w9, [sp]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4, #1]
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3, #253]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5, #256]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9, #-260]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp]
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp, #2]
+        ldpsw x1, x2, [x10, #256]
+        ldpsw x3, x4, [x11, #-260]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp, #2]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10, #256]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11, #-260]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp, #4]
+        ldp x5, x6, [x9, #512]
+        stp x7, x8, [x10, #-520]
+// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp, #4]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp sp, x3, [x10]
+        stp x3, sp, [x9]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9]
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp, #-2]
+        ldp s6, s26, [x4, #-260]
+        stp s13, s19, [x5, #256]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp, #-2]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4, #-260]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5, #256]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr]
+        ldp d5, d6, [x0, #512]
+        stp d7, d8, [x0, #-520]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp]
+        ldp q3, q5, [sp, #8]
+        stp q20, q25, [x5, #1024]
+        ldp q30, q15, [x23, #-1040]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp, #8]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5, #1024]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23, #-1040]
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (post-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w2, [x4], #1
+        stp w1, w2, [x3], #253
+        stp w9, w10, [x5], #256
+        ldp w11, w12, [x9], #-260
+        stp wsp, w9, [sp], #0
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4], #1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3], #253
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5], #256
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9], #-260
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp], #0
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp], #2
+        ldpsw x1, x2, [x10], #256
+        ldpsw x3, x4, [x11], #-260
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp], #2
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11], #-260
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp], #4
+        ldp x5, x6, [x9], #512
+        stp x7, x8, [x10], #-520
+// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp], #4
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9], #512
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10], #-520
+// CHECK-ERROR-NEXT:                            ^
+
+        ldp sp, x3, [x10], #0
+        stp x3, sp, [x9], #0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10], #0
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9], #0
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp], #-2
+        ldp s6, s26, [x4], #-260
+        stp s13, s19, [x5], #256
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp], #-2
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4], #-260
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5], #256
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr], #0
+        ldp d5, d6, [x0], #512
+        stp d7, d8, [x0], #-520
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr], #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0], #512
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0], #-520
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp], #0
+        ldp q3, q5, [sp], #8
+        stp q20, q25, [x5], #1024
+        ldp q30, q15, [x23], #-1040
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp], #0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp], #8
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5], #1024
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23], #-1040
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w2, [x4, #1]!
+        stp w1, w2, [x3, #253]!
+        stp w9, w10, [x5, #256]!
+        ldp w11, w12, [x9, #-260]!
+        stp wsp, w9, [sp, #0]!
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4, #1]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3, #253]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5, #256]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9, #-260]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp, #0]!
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp, #2]!
+        ldpsw x1, x2, [x10, #256]!
+        ldpsw x3, x4, [x11, #-260]!
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp, #2]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10, #256]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11, #-260]!
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp, #4]!
+        ldp x5, x6, [x9, #512]!
+        stp x7, x8, [x10, #-520]!
+// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp, #4]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9, #512]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10, #-520]!
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp sp, x3, [x10, #0]!
+        stp x3, sp, [x9, #0]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10, #0]!
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9, #0]!
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp, #-2]!
+        ldp s6, s26, [x4, #-260]!
+        stp s13, s19, [x5, #256]!
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp, #-2]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4, #-260]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5, #256]!
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr, #0]!
+        ldp d5, d6, [x0, #512]!
+        stp d7, d8, [x0, #-520]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr, #0]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0, #512]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0, #-520]!
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp, #0]!
+        ldp q3, q5, [sp, #8]!
+        stp q20, q25, [x5, #1024]!
+        ldp q30, q15, [x23, #-1040]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp, #0]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp, #8]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5, #1024]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23, #-1040]!
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+        ldnp w3, w2, [x4, #1]
+        stnp w1, w2, [x3, #253]
+        stnp w9, w10, [x5, #256]
+        ldnp w11, w12, [x9, #-260]
+        stnp wsp, w9, [sp]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldnp w3, w2, [x4, #1]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stnp w1, w2, [x3, #253]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stnp w9, w10, [x5, #256]
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldnp w11, w12, [x9, #-260]
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp wsp, w9, [sp]
+// CHECK-ERROR-NEXT:              ^
+
+        ldnp x2, x5, [sp, #4]
+        ldnp x5, x6, [x9, #512]
+        stnp x7, x8, [x10, #-520]
+// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldnp x2, x5, [sp, #4]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldnp x5, x6, [x9, #512]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stnp x7, x8, [x10, #-520]
+// CHECK-ERROR-NEXT:                            ^
+
+        ldnp sp, x3, [x10]
+        stnp x3, sp, [x9]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp sp, x3, [x10]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp x3, sp, [x9]
+// CHECK-ERROR-NEXT:                 ^
+
+        stnp s3, s5, [sp, #-2]
+        ldnp s6, s26, [x4, #-260]
+        stnp s13, s19, [x5, #256]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stnp s3, s5, [sp, #-2]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldnp s6, s26, [x4, #-260]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stnp s13, s19, [x5, #256]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldnp d3, d4, [xzr]
+        ldnp d5, d6, [x0, #512]
+        stnp d7, d8, [x0, #-520]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp d3, d4, [xzr]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldnp d5, d6, [x0, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stnp d7, d8, [x0, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldnp d3, q2, [sp]
+        ldnp q3, q5, [sp, #8]
+        stnp q20, q25, [x5, #1024]
+        ldnp q30, q15, [x23, #-1040]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp d3, q2, [sp]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldnp q3, q5, [sp, #8]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         stnp q20, q25, [x5, #1024]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldnp q30, q15, [x23, #-1040]
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+        orr w0, w1, #0xffffffff
+        and x3, x5, #0xffffffffffffffff
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         orr w0, w1, #0xffffffff
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         and x3, x5, #0xffffffffffffffff
+// CHECK-ERROR-NEXT:                     ^
+
+        ands w3, w9, #0x0
+        eor x2, x0, #0x0
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         ands w3, w9, #0x0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         eor x2, x0, #0x0
+// CHECK-ERROR-NEXT:                     ^
+
+        eor w3, w5, #0x83
+        eor x9, x20, #0x1234
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         eor w3, w5, #0x83
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         eor x9, x20, #0x1234
+// CHECK-ERROR-NEXT:                      ^
+
+        and wzr, w4, 0xffff0000
+        eor xzr, x9, #0xffff0000ffff0000
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and wzr, w4, 0xffff0000
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eor xzr, x9, #0xffff0000ffff0000
+// CHECK-ERROR-NEXT:                      ^
+
+        orr w3, wsp, #0xf0f0f0f0
+        ands x3, sp, #0xaaaaaaaaaaaaaaaa
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orr w3, wsp, #0xf0f0f0f0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ands x3, sp, #0xaaaaaaaaaaaaaaaa
+// CHECK-ERROR-NEXT:                  ^
+
+        tst sp, #0xe0e0e0e0e0e0e0e0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         tst sp, #0xe0e0e0e0e0e0e0e0
+// CHECK-ERROR-NEXT:             ^
+
+        // movi has been removed from the specification. Make sure it's really gone.
+        movi wzr, #0x44444444
+        movi w3, #0xffff
+        movi x9, #0x0000ffff00000000
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi wzr, #0x44444444
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi w3, #0xffff
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi x9, #0x0000ffff00000000
+// CHECK-ERROR-NEXT:         ^
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+
+        //// Out of range shifts
+        and w2, w24, w6, lsl #-1
+        and w4, w6, w12, lsl #32
+        and x4, x6, x12, lsl #64
+        and x2, x5, x11, asr
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         and w2, w24, w6, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         and w4, w6, w12, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         and x4, x6, x12, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected #imm after shift specifier
+// CHECK-ERROR-NEXT:         and x2, x5, x11, asr
+// CHECK-ERROR-NEXT:                             ^
+
+        //// sp not allowed
+        orn wsp, w3, w5
+        bics x20, sp, x9, lsr #0
+        orn x2, x6, sp, lsl #3
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orn wsp, w3, w5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bics x20, sp, x9, lsr #0
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orn x2, x6, sp, lsl #3
+// CHECK-ERROR-NEXT:                     ^
+
+        //// Mismatched registers
+        and x3, w2, w1
+        ands w1, x12, w2
+        and x4, x5, w6, lsl #12
+        orr w2, w5, x7, asr #0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and x3, w2, w1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ands w1, x12, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         and x4, x5, w6, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         orr w2, w5, x7, asr #0
+// CHECK-ERROR-NEXT:                     ^
+
+        //// Shifts should not be allowed on mov
+        mov w3, w7, lsl #13
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mov w3, w7, lsl #13
+// CHECK-ERROR-NEXT:                     ^
+
+//------------------------------------------------------------------------------
+// Move wide (immediate)
+//------------------------------------------------------------------------------
+
+        movz w3, #65536, lsl #0
+        movz w4, #65536
+        movn w1, #2, lsl #1
+        movk w3, #0, lsl #-1
+        movn w2, #-1, lsl #0
+        movz x3, #-1
+        movk w3, #1, lsl #32
+        movn x2, #12, lsl #64
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w3, #65536, lsl #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w4, #65536
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn w1, #2, lsl #1
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: only 'lsl #+N' valid after immediate
+// CHECK-ERROR-NEXT:         movk w3, #0, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn w2, #-1, lsl #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x3, #-1
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w3, #1, lsl #32
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x2, #12, lsl #64
+// CHECK-ERROR-NEXT:                  ^
+
+        movz x12, #:abs_g0:sym, lsl #16
+        movz x12, #:abs_g0:sym, lsl #0
+        movn x2, #:abs_g0:sym
+        movk w3, #:abs_g0:sym
+        movz x3, #:abs_g0_nc:sym
+        movn x4, #:abs_g0_nc:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x12, #:abs_g0:sym, lsl #16
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x12, #:abs_g0:sym, lsl #0
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x2, #:abs_g0:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g0:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x3, #:abs_g0_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x4, #:abs_g0_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+
+        movn x2, #:abs_g1:sym
+        movk w3, #:abs_g1:sym
+        movz x3, #:abs_g1_nc:sym
+        movn x4, #:abs_g1_nc:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x2, #:abs_g1:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g1:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x3, #:abs_g1_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x4, #:abs_g1_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+
+        movz w12, #:abs_g2:sym
+        movn x12, #:abs_g2:sym
+        movk x13, #:abs_g2:sym
+        movk w3, #:abs_g2_nc:sym
+        movz x13, #:abs_g2_nc:sym
+        movn x24, #:abs_g2_nc:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w12, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x12, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk x13, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x13, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x24, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movn x19, #:abs_g3:sym
+        movz w20, #:abs_g3:sym
+        movk w21, #:abs_g3:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x19, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w20, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w21, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movk x19, #:abs_g0_s:sym
+        movk w23, #:abs_g0_s:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g0_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g0_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movk x19, #:abs_g1_s:sym
+        movk w23, #:abs_g1_s:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g1_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g1_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movz w2, #:abs_g2_s:sym
+        movn w29, #:abs_g2_s:sym
+        movk x19, #:abs_g2_s:sym
+        movk w23, #:abs_g2_s:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w2, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn w29, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// PC-relative addressing
+//------------------------------------------------------------------------------
+
+        adr sp, loc             // expects xzr
+        adrp x3, #20            // Immediate unaligned
+        adrp w2, loc            // 64-bit register needed
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adr sp, loc
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adrp x3, #20
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adrp w2, loc
+// CHECK-ERROR-NEXT:              ^
+
+        adr x9, #1048576
+        adr x2, #-1048577
+        adrp x9, #4294967296
+        adrp x20, #-4294971392
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adr x9, #1048576
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adr x2, #-1048577
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adrp x9, #4294967296
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adrp x20, #-4294971392
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// System
+//------------------------------------------------------------------------------
+
+        hint #-1
+        hint #128
+// CHECK-ERROR: error: expected integer in range [0, 127]
+// CHECK-ERROR-NEXT:         hint #-1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 127]
+// CHECK-ERROR-NEXT:         hint #128
+// CHECK-ERROR-NEXT:              ^
+
+        clrex #-1
+        clrex #16
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         clrex #-1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         clrex #16
+// CHECK-ERROR-NEXT:               ^
+
+        dsb #-1
+        dsb #16
+        dmb #-1
+        dmb #16
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dsb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dsb #16
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dmb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dmb #16
+// CHECK-ERROR-NEXT:             ^
+
+        isb #-1
+        isb #16
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         isb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         isb #16
+// CHECK-ERROR-NEXT:             ^
+
+        msr daifset, x4
+        msr spsel, #-1
+        msr spsel #-1
+        msr daifclr, #16
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         msr daifset, x4
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         msr spsel, #-1
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected comma before next operand
+// CHECK-ERROR-NEXT:         msr spsel #-1
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         msr daifclr, #16
+// CHECK-ERROR-NEXT:                      ^
+
+        sys #8, c1, c2, #7, x9
+        sys #3, c16, c2, #3, x10
+        sys #2, c11, c16, #5
+        sys #4, c9, c8, #8, xzr
+        sysl x11, #8, c1, c2, #7
+        sysl x13, #3, c16, c2, #3
+        sysl x9, #2, c11, c16, #5
+        sysl x4, #4, c9, c8, #8
+// CHECK-ERROR-NEXT: error:  expected integer in range [0, 7]
+// CHECK-ERROR-NEXT:         sys #8, c1, c2, #7, x9
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sys #3, c16, c2, #3, x10
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sys #2, c11, c16, #5
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT:         sys #4, c9, c8, #8, xzr
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT:         sysl x11, #8, c1, c2, #7
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sysl x13, #3, c16, c2, #3
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sysl x9, #2, c11, c16, #5
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT:         sysl x4, #4, c9, c8, #8
+// CHECK-ERROR-NEXT:                              ^
+
+        ic ialluis, x2
+        ic allu, x7
+        ic ivau
+// CHECK-ERROR-NEXT: error: specified IC op does not use a register
+// CHECK-ERROR-NEXT:         ic ialluis, x2
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-NEXT:         ic allu, x7
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: specified IC op requires a register
+// CHECK-ERROR-NEXT:         ic ivau
+// CHECK-ERROR-NEXT:            ^
+
+        tlbi IPAS2E1IS
+        tlbi IPAS2LE1IS
+        tlbi VMALLE1IS, x12
+        tlbi ALLE2IS, x11
+        tlbi ALLE3IS, x20
+        tlbi VAE1IS
+        tlbi VAE2IS
+        tlbi VAE3IS
+        tlbi ASIDE1IS
+        tlbi VAAE1IS
+        tlbi ALLE1IS, x0
+        tlbi VALE1IS
+        tlbi VALE2IS
+        tlbi VALE3IS
+        tlbi VMALLS12E1IS, xzr
+        tlbi VAALE1IS
+        tlbi IPAS2E1
+        tlbi IPAS2LE1
+        tlbi VMALLE1, x9
+        tlbi ALLE2, x10
+        tlbi ALLE3, x11
+        tlbi VAE1
+        tlbi VAE2
+        tlbi VAE3
+        tlbi ASIDE1
+        tlbi VAAE1
+        tlbi ALLE1, x25
+        tlbi VALE1
+        tlbi VALE2
+        tlbi VALE3
+        tlbi VMALLS12E1, x15
+        tlbi VAALE1
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2E1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2LE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLE1IS, x12
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE2IS, x11
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE3IS, x20
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE2IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE3IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi ASIDE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAAE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE1IS, x0
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE2IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE3IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLS12E1IS, xzr
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAALE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2E1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2LE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLE1, x9
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE2, x10
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE3, x11
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi ASIDE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAAE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE1, x25
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLS12E1, x15
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAALE1
+// CHECK-ERROR-NEXT:              ^
+
+// For the MSR/MRS instructions, first make sure read-only and
+// write-only registers actually are.
+        msr MDCCSR_EL0, x12
+        msr DBGDTRRX_EL0, x12
+        msr MDRAR_EL1, x12
+        msr OSLSR_EL1, x12
+        msr DBGAUTHSTATUS_EL1, x12
+        msr MIDR_EL1, x12
+        msr CCSIDR_EL1, x12
+        msr CLIDR_EL1, x12
+        msr CTR_EL0, x12
+        msr MPIDR_EL1, x12
+        msr REVIDR_EL1, x12
+        msr AIDR_EL1, x12
+        msr DCZID_EL0, x12
+        msr ID_PFR0_EL1, x12
+        msr ID_PFR1_EL1, x12
+        msr ID_DFR0_EL1, x12
+        msr ID_AFR0_EL1, x12
+        msr ID_MMFR0_EL1, x12
+        msr ID_MMFR1_EL1, x12
+        msr ID_MMFR2_EL1, x12
+        msr ID_MMFR3_EL1, x12
+        msr ID_ISAR0_EL1, x12
+        msr ID_ISAR1_EL1, x12
+        msr ID_ISAR2_EL1, x12
+        msr ID_ISAR3_EL1, x12
+        msr ID_ISAR4_EL1, x12
+        msr ID_ISAR5_EL1, x12
+        msr MVFR0_EL1, x12
+        msr MVFR1_EL1, x12
+        msr MVFR2_EL1, x12
+        msr ID_AA64PFR0_EL1, x12
+        msr ID_AA64PFR1_EL1, x12
+        msr ID_AA64DFR0_EL1, x12
+        msr ID_AA64DFR1_EL1, x12
+        msr ID_AA64AFR0_EL1, x12
+        msr ID_AA64AFR1_EL1, x12
+        msr ID_AA64ISAR0_EL1, x12
+        msr ID_AA64ISAR1_EL1, x12
+        msr ID_AA64MMFR0_EL1, x12
+        msr ID_AA64MMFR1_EL1, x12
+        msr PMCEID0_EL0, x12
+        msr PMCEID1_EL0, x12
+        msr RVBAR_EL1, x12
+        msr RVBAR_EL2, x12
+        msr RVBAR_EL3, x12
+        msr ISR_EL1, x12
+        msr CNTPCT_EL0, x12
+        msr CNTVCT_EL0, x12
+        msr PMEVCNTR31_EL0, x12
+        msr PMEVTYPER31_EL0, x12
+// CHECK-ERROR: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MDCCSR_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr DBGDTRRX_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MDRAR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr OSLSR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr DBGAUTHSTATUS_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CCSIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CLIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CTR_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MPIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr REVIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr AIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr DCZID_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_PFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_PFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_DFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_MMFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_MMFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_MMFR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_MMFR3_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR3_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR4_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR5_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MVFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MVFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MVFR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64PFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64PFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64DFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64DFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64AFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64AFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64ISAR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64ISAR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64MMFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64MMFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr PMCEID0_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr PMCEID1_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr RVBAR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr RVBAR_EL2, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr RVBAR_EL3, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ISR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CNTPCT_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CNTVCT_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr PMEVCNTR31_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr PMEVTYPER31_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+
+        mrs x9, DBGDTRTX_EL0
+        mrs x9, OSLAR_EL1
+        mrs x9, PMSWINC_EL0
+        mrs x9, PMEVCNTR31_EL0
+        mrs x9, PMEVTYPER31_EL0
+// CHECK-ERROR: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, DBGDTRTX_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, OSLAR_EL1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, PMSWINC_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, PMEVCNTR31_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, PMEVTYPER31_EL0
+// CHECK-ERROR-NEXT:                 ^
+
+// Now check some invalid generic names
+        mrs xzr, s2_5_c11_c13_2
+        mrs x12, s3_8_c11_c13_2
+        mrs x13, s3_3_c12_c13_2
+        mrs x19, s3_2_c15_c16_2
+        mrs x30, s3_2_c15_c1_8
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs xzr, s2_5_c11_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x12, s3_8_c11_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x13, s3_3_c12_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x19, s3_2_c15_c16_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x30, s3_2_c15_c1_8
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Test and branch (immediate)
+//------------------------------------------------------------------------------
+
+        tbz w3, #-1, addr
+        tbz w3, #32, nowhere
+        tbz x9, #-1, there
+        tbz x20, #64, dont
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:     tbz w3, #-1, addr
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        tbz w3, #32, nowhere
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:        tbz x9, #-1, there
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:        tbz x20, #64, dont
+// CHECK-ERROR-NEXT:                 ^
+
+        tbnz w3, #-1, addr
+        tbnz w3, #32, nowhere
+        tbnz x9, #-1, there
+        tbnz x20, #64, dont
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        tbnz w3, #-1, addr
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        tbnz w3, #32, nowhere
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:        tbnz x9, #-1, there
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:        tbnz x20, #64, dont
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b #134217728
+        b #-134217732
+        b #1
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         b #134217728
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         b #-134217732
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         b #1
+// CHECK-ERROR-NEXT:           ^
+
+//------------------------------------------------------------------------------
+// Unconditional branch (register)
+//------------------------------------------------------------------------------
+
+        br w2
+        br sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         br w2
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         br sp
+// CHECK-ERROR-NEXT:            ^
+
+        //// These ones shouldn't allow any registers
+        eret x2
+        drps x2
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eret x2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         drps x2
+// CHECK-ERROR-NEXT:              ^
+
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
new file mode 100644
index 0000000..ad3064e
--- /dev/null
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -0,0 +1,4819 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding < %s | FileCheck %s
+  .globl _func
+
+// Check that the assembler can handle the documented syntax from the ARM ARM.
+// For complex constructs like shifter operands, check more thoroughly for them
+// once then spot check that following instructions accept the form generally.
+// This gives us good coverage while keeping the overall size of the test
+// more reasonable.
+
+
+_func:
+// CHECK: _func
+
+//------------------------------------------------------------------------------
+// Add/sub (extended register)
+//------------------------------------------------------------------------------
+        // Basic extends 64-bit ops
+        add x2, x4, w5, uxtb
+        add x20, sp, w19, uxth
+        add x12, x1, w20, uxtw
+        add x20, x3, x13, uxtx
+        add x17, x25, w20, sxtb
+        add x18, x13, w19, sxth
+        add sp, x2, w3, sxtw
+        add x3, x5, x9, sxtx
+// CHECK: add      x2, x4, w5, uxtb           // encoding: [0x82,0x00,0x25,0x8b]
+// CHECK: add      x20, sp, w19, uxth         // encoding: [0xf4,0x23,0x33,0x8b]
+// CHECK: add      x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0x8b]
+// CHECK: add      x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0x8b]
+// CHECK: add      x17, x25, w20, sxtb        // encoding: [0x31,0x83,0x34,0x8b]
+// CHECK: add      x18, x13, w19, sxth        // encoding: [0xb2,0xa1,0x33,0x8b]
+// CHECK: add      sp, x2, w3, sxtw           // encoding: [0x5f,0xc0,0x23,0x8b]
+// CHECK: add      x3, x5, x9, sxtx           // encoding: [0xa3,0xe0,0x29,0x8b]
+
+        // Basic extends, 32-bit ops
+        add w2, w5, w7, uxtb
+        add w21, w15, w17, uxth
+        add w30, w29, wzr, uxtw
+        add w19, w17, w1, uxtx  // Goodness knows what this means
+        add w2, w5, w1, sxtb
+        add w26, w17, w19, sxth
+        add w0, w2, w3, sxtw
+        add w2, w3, w5, sxtx
+// CHECK: add      w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x0b]
+// CHECK: add      w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x0b]
+// CHECK: add      w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x0b]
+// CHECK: add      w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x0b]
+// CHECK: add      w2, w5, w1, sxtb           // encoding: [0xa2,0x80,0x21,0x0b]
+// CHECK: add      w26, w17, w19, sxth        // encoding: [0x3a,0xa2,0x33,0x0b]
+// CHECK: add      w0, w2, w3, sxtw           // encoding: [0x40,0xc0,0x23,0x0b]
+// CHECK: add      w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x0b]
+
+        // Nonzero shift amounts
+        add x2, x3, w5, sxtb #0
+        add x7, x11, w13, uxth #4
+        add w17, w19, w23, uxtw #2
+        add w29, w23, w17, uxtx #1
+// CHECK: add      x2, x3, w5, sxtb           // encoding: [0x62,0x80,0x25,0x8b]
+// CHECK: add      x7, x11, w13, uxth #4      // encoding: [0x67,0x31,0x2d,0x8b]
+// CHECK: add      w17, w19, w23, uxtw #2     // encoding: [0x71,0x4a,0x37,0x0b]
+// CHECK: add      w29, w23, w17, uxtx #1     // encoding: [0xfd,0x66,0x31,0x0b]
+
+        // Sub
+        sub x2, x4, w5, uxtb #2
+        sub x20, sp, w19, uxth #4
+        sub x12, x1, w20, uxtw
+        sub x20, x3, x13, uxtx #0
+        sub x17, x25, w20, sxtb
+        sub x18, x13, w19, sxth
+        sub sp, x2, w3, sxtw
+        sub x3, x5, x9, sxtx
+// CHECK: sub      x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xcb]
+// CHECK: sub      x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xcb]
+// CHECK: sub      x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xcb]
+// CHECK: sub      x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xcb]
+// CHECK: sub      x17, x25, w20, sxtb        // encoding: [0x31,0x83,0x34,0xcb]
+// CHECK: sub      x18, x13, w19, sxth        // encoding: [0xb2,0xa1,0x33,0xcb]
+// CHECK: sub      sp, x2, w3, sxtw           // encoding: [0x5f,0xc0,0x23,0xcb]
+// CHECK: sub      x3, x5, x9, sxtx           // encoding: [0xa3,0xe0,0x29,0xcb]
+
+        sub w2, w5, w7, uxtb
+        sub w21, w15, w17, uxth
+        sub w30, w29, wzr, uxtw
+        sub w19, w17, w1, uxtx  // Goodness knows what this means
+        sub w2, w5, w1, sxtb
+        sub w26, wsp, w19, sxth
+        sub wsp, w2, w3, sxtw
+        sub w2, w3, w5, sxtx
+// CHECK: sub      w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x4b]
+// CHECK: sub      w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x4b]
+// CHECK: sub      w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x4b]
+// CHECK: sub      w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x4b]
+// CHECK: sub      w2, w5, w1, sxtb           // encoding: [0xa2,0x80,0x21,0x4b]
+// CHECK: sub      w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x4b]
+// CHECK: sub      wsp, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x4b]
+// CHECK: sub      w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x4b]
+
+        // Adds
+        adds x2, x4, w5, uxtb #2
+        adds x20, sp, w19, uxth #4
+        adds x12, x1, w20, uxtw
+        adds x20, x3, x13, uxtx #0
+        adds xzr, x25, w20, sxtb #3
+        adds x18, sp, w19, sxth
+        adds xzr, x2, w3, sxtw
+        adds x3, x5, x9, sxtx #2
+// CHECK: adds     x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xab]
+// CHECK: adds     x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xab]
+// CHECK: adds     x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xab]
+// CHECK: adds     x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xab]
+// CHECK: adds     xzr, x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xab]
+// CHECK: adds     x18, sp, w19, sxth         // encoding: [0xf2,0xa3,0x33,0xab]
+// CHECK: adds     xzr, x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xab]
+// CHECK: adds     x3, x5, x9, sxtx #2        // encoding: [0xa3,0xe8,0x29,0xab]
+
+        adds w2, w5, w7, uxtb
+        adds w21, w15, w17, uxth
+        adds w30, w29, wzr, uxtw
+        adds w19, w17, w1, uxtx  // Goodness knows what this means
+        adds w2, w5, w1, sxtb #1
+        adds w26, wsp, w19, sxth
+        adds wzr, w2, w3, sxtw
+        adds w2, w3, w5, sxtx
+// CHECK: adds     w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x2b]
+// CHECK: adds     w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x2b]
+// CHECK: adds     w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x2b]
+// CHECK: adds     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x2b]
+// CHECK: adds     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x2b]
+// CHECK: adds     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x2b]
+// CHECK: adds     wzr, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: adds     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x2b]
+
+        // subs
+        subs x2, x4, w5, uxtb #2
+        subs x20, sp, w19, uxth #4
+        subs x12, x1, w20, uxtw
+        subs x20, x3, x13, uxtx #0
+        subs xzr, x25, w20, sxtb #3
+        subs x18, sp, w19, sxth
+        subs xzr, x2, w3, sxtw
+        subs x3, x5, x9, sxtx #2
+// CHECK: subs     x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xeb]
+// CHECK: subs     x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xeb]
+// CHECK: subs     x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xeb]
+// CHECK: subs     x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xeb]
+// CHECK: subs     xzr, x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xeb]
+// CHECK: subs     x18, sp, w19, sxth         // encoding: [0xf2,0xa3,0x33,0xeb]
+// CHECK: subs     xzr, x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xeb]
+// CHECK: subs     x3, x5, x9, sxtx #2        // encoding: [0xa3,0xe8,0x29,0xeb]
+
+        subs w2, w5, w7, uxtb
+        subs w21, w15, w17, uxth
+        subs w30, w29, wzr, uxtw
+        subs w19, w17, w1, uxtx  // Goodness knows what this means
+        subs w2, w5, w1, sxtb #1
+        subs w26, wsp, w19, sxth
+        subs wzr, w2, w3, sxtw
+        subs w2, w3, w5, sxtx
+// CHECK: subs     w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x6b]
+// CHECK: subs     w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x6b]
+// CHECK: subs     w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x6b]
+// CHECK: subs     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x6b]
+// CHECK: subs     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x6b]
+// CHECK: subs     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x6b]
+// CHECK: subs     wzr, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x6b]
+// CHECK: subs     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x6b]
+
+        // cmp
+        cmp x4, w5, uxtb #2
+        cmp sp, w19, uxth #4
+        cmp x1, w20, uxtw
+        cmp x3, x13, uxtx #0
+        cmp x25, w20, sxtb #3
+        cmp sp, w19, sxth
+        cmp x2, w3, sxtw
+        cmp x5, x9, sxtx #2
+// CHECK: cmp      x4, w5, uxtb #2            // encoding: [0x9f,0x08,0x25,0xeb]
+// CHECK: cmp      sp, w19, uxth #4           // encoding: [0xff,0x33,0x33,0xeb]
+// CHECK: cmp      x1, w20, uxtw              // encoding: [0x3f,0x40,0x34,0xeb]
+// CHECK: cmp      x3, x13, uxtx              // encoding: [0x7f,0x60,0x2d,0xeb]
+// CHECK: cmp      x25, w20, sxtb #3          // encoding: [0x3f,0x8f,0x34,0xeb]
+// CHECK: cmp      sp, w19, sxth              // encoding: [0xff,0xa3,0x33,0xeb]
+// CHECK: cmp      x2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0xeb]
+// CHECK: cmp      x5, x9, sxtx #2            // encoding: [0xbf,0xe8,0x29,0xeb]
+
+        cmp w5, w7, uxtb
+        cmp w15, w17, uxth
+        cmp w29, wzr, uxtw
+        cmp w17, w1, uxtx  // Goodness knows what this means
+        cmp w5, w1, sxtb #1
+        cmp wsp, w19, sxth
+        cmp w2, w3, sxtw
+        cmp w3, w5, sxtx
+// CHECK: cmp      w5, w7, uxtb               // encoding: [0xbf,0x00,0x27,0x6b]
+// CHECK: cmp      w15, w17, uxth             // encoding: [0xff,0x21,0x31,0x6b]
+// CHECK: cmp      w29, wzr, uxtw             // encoding: [0xbf,0x43,0x3f,0x6b]
+// CHECK: cmp      w17, w1, uxtx              // encoding: [0x3f,0x62,0x21,0x6b]
+// CHECK: cmp      w5, w1, sxtb #1            // encoding: [0xbf,0x84,0x21,0x6b]
+// CHECK: cmp      wsp, w19, sxth             // encoding: [0xff,0xa3,0x33,0x6b]
+// CHECK: cmp      w2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0x6b]
+// CHECK: cmp      w3, w5, sxtx               // encoding: [0x7f,0xe0,0x25,0x6b]
+
+
+        // cmn
+        cmn x4, w5, uxtb #2
+        cmn sp, w19, uxth #4
+        cmn x1, w20, uxtw
+        cmn x3, x13, uxtx #0
+        cmn x25, w20, sxtb #3
+        cmn sp, w19, sxth
+        cmn x2, w3, sxtw
+        cmn x5, x9, sxtx #2
+// CHECK: cmn      x4, w5, uxtb #2            // encoding: [0x9f,0x08,0x25,0xab]
+// CHECK: cmn      sp, w19, uxth #4           // encoding: [0xff,0x33,0x33,0xab]
+// CHECK: cmn      x1, w20, uxtw              // encoding: [0x3f,0x40,0x34,0xab]
+// CHECK: cmn      x3, x13, uxtx              // encoding: [0x7f,0x60,0x2d,0xab]
+// CHECK: cmn      x25, w20, sxtb #3          // encoding: [0x3f,0x8f,0x34,0xab]
+// CHECK: cmn      sp, w19, sxth              // encoding: [0xff,0xa3,0x33,0xab]
+// CHECK: cmn      x2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0xab]
+// CHECK: cmn      x5, x9, sxtx #2            // encoding: [0xbf,0xe8,0x29,0xab]
+
+        cmn w5, w7, uxtb
+        cmn w15, w17, uxth
+        cmn w29, wzr, uxtw
+        cmn w17, w1, uxtx  // Goodness knows what this means
+        cmn w5, w1, sxtb #1
+        cmn wsp, w19, sxth
+        cmn w2, w3, sxtw
+        cmn w3, w5, sxtx
+// CHECK: cmn      w5, w7, uxtb               // encoding: [0xbf,0x00,0x27,0x2b]
+// CHECK: cmn      w15, w17, uxth             // encoding: [0xff,0x21,0x31,0x2b]
+// CHECK: cmn      w29, wzr, uxtw             // encoding: [0xbf,0x43,0x3f,0x2b]
+// CHECK: cmn      w17, w1, uxtx              // encoding: [0x3f,0x62,0x21,0x2b]
+// CHECK: cmn      w5, w1, sxtb #1            // encoding: [0xbf,0x84,0x21,0x2b]
+// CHECK: cmn      wsp, w19, sxth             // encoding: [0xff,0xa3,0x33,0x2b]
+// CHECK: cmn      w2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: cmn      w3, w5, sxtx               // encoding: [0x7f,0xe0,0x25,0x2b]
+
+        // operands for cmp
+        cmp x20, w29, uxtb #3
+        cmp x12, x13, uxtx #4
+        cmp wsp, w1, uxtb
+        cmn wsp, wzr, sxtw
+// CHECK: cmp      x20, w29, uxtb #3          // encoding: [0x9f,0x0e,0x3d,0xeb]
+// CHECK: cmp      x12, x13, uxtx #4          // encoding: [0x9f,0x71,0x2d,0xeb]
+// CHECK: cmp      wsp, w1, uxtb              // encoding: [0xff,0x03,0x21,0x6b]
+// CHECK: cmn      wsp, wzr, sxtw             // encoding: [0xff,0xc3,0x3f,0x2b]
+
+        // LSL variant if sp involved
+        sub sp, x3, x7, lsl #4
+        add w2, wsp, w3, lsl #1
+        cmp wsp, w9, lsl #0
+        adds wzr, wsp, w3, lsl #4
+        subs x3, sp, x9, lsl #2
+// CHECK: sub      sp, x3, x7, lsl #4         // encoding: [0x7f,0x70,0x27,0xcb]
+// CHECK: add      w2, wsp, w3, lsl #1        // encoding: [0xe2,0x47,0x23,0x0b]
+// CHECK: cmp      wsp, w9                    // encoding: [0xff,0x43,0x29,0x6b]
+// CHECK: adds     wzr, wsp, w3, lsl #4       // encoding: [0xff,0x53,0x23,0x2b]
+// CHECK: subs     x3, sp, x9, lsl #2         // encoding: [0xe3,0x6b,0x29,0xeb]
+
+//------------------------------------------------------------------------------
+// Add/sub (immediate)
+//------------------------------------------------------------------------------
+
+// Check basic immediate values: an unsigned 12-bit immediate, optionally
+// shifted left by 12 bits.
+        add w4, w5, #0x0
+        add w2, w3, #4095
+        add w30, w29, #1, lsl #12
+        add w13, w5, #4095, lsl #12
+        add x5, x7, #1638
+// CHECK: add      w4, w5, #0                 // encoding: [0xa4,0x00,0x00,0x11]
+// CHECK: add      w2, w3, #4095              // encoding: [0x62,0xfc,0x3f,0x11]
+// CHECK: add      w30, w29, #1, lsl #12      // encoding: [0xbe,0x07,0x40,0x11]
+// CHECK: add      w13, w5, #4095, lsl #12    // encoding: [0xad,0xfc,0x7f,0x11]
+// CHECK: add      x5, x7, #1638              // encoding: [0xe5,0x98,0x19,0x91]
+
+// All registers involved in the non-S variants have 31 encoding sp rather than zr
+        add w20, wsp, #801, lsl #0
+        add wsp, wsp, #1104
+        add wsp, w30, #4084
+// CHECK: add      w20, wsp, #801             // encoding: [0xf4,0x87,0x0c,0x11]
+// CHECK: add      wsp, wsp, #1104            // encoding: [0xff,0x43,0x11,0x11]
+// CHECK: add      wsp, w30, #4084            // encoding: [0xdf,0xd3,0x3f,0x11]
+
+// A few checks on the sanity of 64-bit versions
+        add x0, x24, #291
+        add x3, x24, #4095, lsl #12
+        add x8, sp, #1074
+        add sp, x29, #3816
+// CHECK: add      x0, x24, #291              // encoding: [0x00,0x8f,0x04,0x91]
+// CHECK: add      x3, x24, #4095, lsl #12    // encoding: [0x03,0xff,0x7f,0x91]
+// CHECK: add      x8, sp, #1074              // encoding: [0xe8,0xcb,0x10,0x91]
+// CHECK: add      sp, x29, #3816             // encoding: [0xbf,0xa3,0x3b,0x91]
+
+// And on sub
+        sub w0, wsp, #4077
+        sub w4, w20, #546, lsl #12
+        sub sp, sp, #288
+        sub wsp, w19, #16
+// CHECK: sub      w0, wsp, #4077             // encoding: [0xe0,0xb7,0x3f,0x51]
+// CHECK: sub      w4, w20, #546, lsl #12     // encoding: [0x84,0x8a,0x48,0x51]
+// CHECK: sub      sp, sp, #288               // encoding: [0xff,0x83,0x04,0xd1]
+// CHECK: sub      wsp, w19, #16              // encoding: [0x7f,0x42,0x00,0x51]
+
+// ADDS/SUBS accept zr in the Rd position but sp in the Rn position
+        adds w13, w23, #291, lsl #12
+        adds wzr, w2, #4095                  // FIXME: canonically should be cmn
+        adds w20, wsp, #0x0
+        adds xzr, x3, #0x1, lsl #12          // FIXME: canonically should be cmn
+// CHECK: adds     w13, w23, #291, lsl #12    // encoding: [0xed,0x8e,0x44,0x31]
+// CHECK: adds     wzr, w2, #4095             // encoding: [0x5f,0xfc,0x3f,0x31]
+// CHECK: adds     w20, wsp, #0               // encoding: [0xf4,0x03,0x00,0x31]
+// CHECK: adds     xzr, x3, #1, lsl #12       // encoding: [0x7f,0x04,0x40,0xb1]
+
+// Checks for subs
+        subs xzr, sp, #20, lsl #12           // FIXME: canonically should be cmp
+        subs xzr, x30, #4095, lsl #0         // FIXME: canonically should be cmp
+        subs x4, sp, #3822
+// CHECK: subs     xzr, sp, #20, lsl #12      // encoding: [0xff,0x53,0x40,0xf1]
+// CHECK: subs     xzr, x30, #4095            // encoding: [0xdf,0xff,0x3f,0xf1]
+// CHECK: subs     x4, sp, #3822              // encoding: [0xe4,0xbb,0x3b,0xf1]
+
+// cmn is an alias for adds zr, ...
+        cmn w3, #291, lsl #12
+        cmn wsp, #1365, lsl #0
+        cmn sp, #1092, lsl #12
+// CHECK: cmn      w3, #291, lsl #12          // encoding: [0x7f,0x8c,0x44,0x31]
+// CHECK: cmn      wsp, #1365                 // encoding: [0xff,0x57,0x15,0x31]
+// CHECK: cmn      sp, #1092, lsl #12         // encoding: [0xff,0x13,0x51,0xb1]
+
+// cmp is an alias for subs zr, ... (FIXME: should always disassemble as such too).
+        cmp x4, #300, lsl #12
+        cmp wsp, #500
+        cmp sp, #200, lsl #0
+// CHECK: cmp      x4, #300, lsl #12          // encoding: [0x9f,0xb0,0x44,0xf1]
+// CHECK: cmp      wsp, #500                  // encoding: [0xff,0xd3,0x07,0x71]
+// CHECK: cmp      sp, #200                   // encoding: [0xff,0x23,0x03,0xf1]
+
+// A "MOV" involving sp is encoded in this manner: add Reg, Reg, #0
+        mov sp, x30
+        mov wsp, w20
+        mov x11, sp
+        mov w24, wsp
+// CHECK: mov      sp, x30                    // encoding: [0xdf,0x03,0x00,0x91]
+// CHECK: mov      wsp, w20                   // encoding: [0x9f,0x02,0x00,0x11]
+// CHECK: mov      x11, sp                    // encoding: [0xeb,0x03,0x00,0x91]
+// CHECK: mov      w24, wsp                   // encoding: [0xf8,0x03,0x00,0x11]
+
+// A relocation check (default to lo12, which is the only sane relocation anyway really)
+        add x0, x4, #:lo12:var
+// CHECK: add     x0, x4, #:lo12:var         // encoding: [0x80'A',A,A,0x91'A']
+// CHECK:                                    //   fixup A - offset: 0, value: :lo12:var, kind: fixup_a64_add_lo12
+
+//------------------------------------------------------------------------------
+// Add-sub (shifted register)
+//------------------------------------------------------------------------------
+
+// As usual, we don't print the canonical forms of many instructions.
+
+        add w3, w5, w7
+        add wzr, w3, w5
+        add w20, wzr, w4
+        add w4, w6, wzr
+// CHECK: add      w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x0b]
+// CHECK: add      wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x0b]
+// CHECK: add      w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x0b]
+// CHECK: add      w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x0b]
+
+        add w11, w13, w15, lsl #0
+        add w9, w3, wzr, lsl #10
+        add w17, w29, w20, lsl #31
+// CHECK: add      w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x0b]
+// CHECK: add      w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x0b]
+// CHECK: add      w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x0b]
+
+        add w21, w22, w23, lsr #0
+        add w24, w25, w26, lsr #18
+        add w27, w28, w29, lsr #31
+// CHECK: add      w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x0b]
+// CHECK: add      w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x0b]
+// CHECK: add      w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x0b]
+
+        add w2, w3, w4, asr #0
+        add w5, w6, w7, asr #21
+        add w8, w9, w10, asr #31
+// CHECK: add      w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x0b]
+// CHECK: add      w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x0b]
+// CHECK: add      w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x0b]
+
+        add x3, x5, x7
+        add xzr, x3, x5
+        add x20, xzr, x4
+        add x4, x6, xzr
+// CHECK: add      x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0x8b]
+// CHECK: add      xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0x8b]
+// CHECK: add      x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0x8b]
+// CHECK: add      x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0x8b]
+
+        add x11, x13, x15, lsl #0
+        add x9, x3, xzr, lsl #10
+        add x17, x29, x20, lsl #63
+// CHECK: add      x11, x13, x15              // encoding: [0xab,0x01,0x0f,0x8b]
+// CHECK: add      x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x8b]
+// CHECK: add      x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0x8b]
+
+        add x21, x22, x23, lsr #0
+        add x24, x25, x26, lsr #18
+        add x27, x28, x29, lsr #63
+// CHECK: add      x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0x8b]
+// CHECK: add      x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x8b]
+// CHECK: add      x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0x8b]
+
+        add x2, x3, x4, asr #0
+        add x5, x6, x7, asr #21
+        add x8, x9, x10, asr #63
+// CHECK: add      x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0x8b]
+// CHECK: add      x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0x8b]
+// CHECK: add      x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0x8b]
+
+        adds w3, w5, w7
+        adds wzr, w3, w5
+        adds w20, wzr, w4
+        adds w4, w6, wzr
+// CHECK: adds     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x2b]
+// CHECK: adds     wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x2b]
+// CHECK: adds     w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x2b]
+// CHECK: adds     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x2b]
+
+        adds w11, w13, w15, lsl #0
+        adds w9, w3, wzr, lsl #10
+        adds w17, w29, w20, lsl #31
+// CHECK: adds     w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x2b]
+// CHECK: adds     w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x2b]
+// CHECK: adds     w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x2b]
+
+        adds w21, w22, w23, lsr #0
+        adds w24, w25, w26, lsr #18
+        adds w27, w28, w29, lsr #31
+// CHECK: adds     w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x2b]
+// CHECK: adds     w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x2b]
+// CHECK: adds     w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x2b]
+
+        adds w2, w3, w4, asr #0
+        adds w5, w6, w7, asr #21
+        adds w8, w9, w10, asr #31
+// CHECK: adds     w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x2b]
+// CHECK: adds     w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x2b]
+// CHECK: adds     w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x2b]
+
+        adds x3, x5, x7
+        adds xzr, x3, x5
+        adds x20, xzr, x4
+        adds x4, x6, xzr
+// CHECK: adds     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xab]
+// CHECK: adds     xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xab]
+// CHECK: adds     x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xab]
+// CHECK: adds     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xab]
+
+        adds x11, x13, x15, lsl #0
+        adds x9, x3, xzr, lsl #10
+        adds x17, x29, x20, lsl #63
+// CHECK: adds     x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xab]
+// CHECK: adds     x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xab]
+// CHECK: adds     x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xab]
+
+        adds x21, x22, x23, lsr #0
+        adds x24, x25, x26, lsr #18
+        adds x27, x28, x29, lsr #63
+// CHECK: adds     x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xab]
+// CHECK: adds     x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xab]
+// CHECK: adds     x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xab]
+
+        adds x2, x3, x4, asr #0
+        adds x5, x6, x7, asr #21
+        adds x8, x9, x10, asr #63
+// CHECK: adds     x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xab]
+// CHECK: adds     x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xab]
+// CHECK: adds     x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xab]
+
+        sub w3, w5, w7
+        sub wzr, w3, w5
+        sub w20, wzr, w4
+        sub w4, w6, wzr
+// CHECK: sub      w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x4b]
+// CHECK: sub      wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x4b]
+// CHECK: sub      w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x4b]
+// CHECK: sub      w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x4b]
+
+        sub w11, w13, w15, lsl #0
+        sub w9, w3, wzr, lsl #10
+        sub w17, w29, w20, lsl #31
+// CHECK: sub      w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x4b]
+// CHECK: sub      w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x4b]
+// CHECK: sub      w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x4b]
+
+        sub w21, w22, w23, lsr #0
+        sub w24, w25, w26, lsr #18
+        sub w27, w28, w29, lsr #31
+// CHECK: sub      w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x4b]
+// CHECK: sub      w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x4b]
+// CHECK: sub      w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x4b]
+
+        sub w2, w3, w4, asr #0
+        sub w5, w6, w7, asr #21
+        sub w8, w9, w10, asr #31
+// CHECK: sub      w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x4b]
+// CHECK: sub      w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x4b]
+// CHECK: sub      w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x4b]
+
+        sub x3, x5, x7
+        sub xzr, x3, x5
+        sub x20, xzr, x4
+        sub x4, x6, xzr
+// CHECK: sub      x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xcb]
+// CHECK: sub      xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xcb]
+// CHECK: sub      x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xcb]
+// CHECK: sub      x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xcb]
+
+        sub x11, x13, x15, lsl #0
+        sub x9, x3, xzr, lsl #10
+        sub x17, x29, x20, lsl #63
+// CHECK: sub      x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xcb]
+// CHECK: sub      x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xcb]
+// CHECK: sub      x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xcb]
+
+        sub x21, x22, x23, lsr #0
+        sub x24, x25, x26, lsr #18
+        sub x27, x28, x29, lsr #63
+// CHECK: sub      x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xcb]
+// CHECK: sub      x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xcb]
+// CHECK: sub      x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xcb]
+
+        sub x2, x3, x4, asr #0
+        sub x5, x6, x7, asr #21
+        sub x8, x9, x10, asr #63
+// CHECK: sub      x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xcb]
+// CHECK: sub      x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xcb]
+// CHECK: sub      x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xcb]
+
+        subs w3, w5, w7
+        subs wzr, w3, w5
+        subs w20, wzr, w4
+        subs w4, w6, wzr
+// CHECK: subs     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x6b]
+// CHECK: subs     wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x6b]
+// CHECK: subs     w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x6b]
+// CHECK: subs     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x6b]
+
+        subs w11, w13, w15, lsl #0
+        subs w9, w3, wzr, lsl #10
+        subs w17, w29, w20, lsl #31
+// CHECK: subs     w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x6b]
+// CHECK: subs     w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x6b]
+// CHECK: subs     w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x6b]
+
+        subs w21, w22, w23, lsr #0
+        subs w24, w25, w26, lsr #18
+        subs w27, w28, w29, lsr #31
+// CHECK: subs     w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x6b]
+// CHECK: subs     w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x6b]
+// CHECK: subs     w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x6b]
+
+        subs w2, w3, w4, asr #0
+        subs w5, w6, w7, asr #21
+        subs w8, w9, w10, asr #31
+// CHECK: subs     w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x6b]
+// CHECK: subs     w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x6b]
+// CHECK: subs     w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x6b]
+
+        subs x3, x5, x7
+        subs xzr, x3, x5
+        subs x20, xzr, x4
+        subs x4, x6, xzr
+// CHECK: subs     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xeb]
+// CHECK: subs     xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xeb]
+// CHECK: subs     x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xeb]
+// CHECK: subs     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xeb]
+
+        subs x11, x13, x15, lsl #0
+        subs x9, x3, xzr, lsl #10
+        subs x17, x29, x20, lsl #63
+// CHECK: subs     x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xeb]
+// CHECK: subs     x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xeb]
+// CHECK: subs     x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xeb]
+
+        subs x21, x22, x23, lsr #0
+        subs x24, x25, x26, lsr #18
+        subs x27, x28, x29, lsr #63
+// CHECK: subs     x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xeb]
+// CHECK: subs     x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xeb]
+// CHECK: subs     x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xeb]
+
+        subs x2, x3, x4, asr #0
+        subs x5, x6, x7, asr #21
+        subs x8, x9, x10, asr #63
+// CHECK: subs     x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xeb]
+// CHECK: subs     x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xeb]
+// CHECK: subs     x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xeb]
+
+        cmn w0, w3
+        cmn wzr, w4
+        cmn w5, wzr
+// CHECK: cmn      w0, w3                     // encoding: [0x1f,0x00,0x03,0x2b]
+// CHECK: cmn      wzr, w4                    // encoding: [0xff,0x03,0x04,0x2b]
+// CHECK: cmn      w5, wzr                    // encoding: [0xbf,0x00,0x1f,0x2b]
+
+        cmn w6, w7, lsl #0
+        cmn w8, w9, lsl #15
+        cmn w10, w11, lsl #31
+// CHECK: cmn      w6, w7                     // encoding: [0xdf,0x00,0x07,0x2b]
+// CHECK: cmn      w8, w9, lsl #15            // encoding: [0x1f,0x3d,0x09,0x2b]
+// CHECK: cmn      w10, w11, lsl #31          // encoding: [0x5f,0x7d,0x0b,0x2b]
+
+        cmn w12, w13, lsr #0
+        cmn w14, w15, lsr #21
+        cmn w16, w17, lsr #31
+// CHECK: cmn      w12, w13, lsr #0           // encoding: [0x9f,0x01,0x4d,0x2b]
+// CHECK: cmn      w14, w15, lsr #21          // encoding: [0xdf,0x55,0x4f,0x2b]
+// CHECK: cmn      w16, w17, lsr #31          // encoding: [0x1f,0x7e,0x51,0x2b]
+
+        cmn w18, w19, asr #0
+        cmn w20, w21, asr #22
+        cmn w22, w23, asr #31
+// CHECK: cmn      w18, w19, asr #0           // encoding: [0x5f,0x02,0x93,0x2b]
+// CHECK: cmn      w20, w21, asr #22          // encoding: [0x9f,0x5a,0x95,0x2b]
+// CHECK: cmn      w22, w23, asr #31          // encoding: [0xdf,0x7e,0x97,0x2b]
+
+        cmn x0, x3
+        cmn xzr, x4
+        cmn x5, xzr
+// CHECK: cmn      x0, x3                     // encoding: [0x1f,0x00,0x03,0xab]
+// CHECK: cmn      xzr, x4                    // encoding: [0xff,0x03,0x04,0xab]
+// CHECK: cmn      x5, xzr                    // encoding: [0xbf,0x00,0x1f,0xab]
+
+        cmn x6, x7, lsl #0
+        cmn x8, x9, lsl #15
+        cmn x10, x11, lsl #63
+// CHECK: cmn      x6, x7                     // encoding: [0xdf,0x00,0x07,0xab]
+// CHECK: cmn      x8, x9, lsl #15            // encoding: [0x1f,0x3d,0x09,0xab]
+// CHECK: cmn      x10, x11, lsl #63          // encoding: [0x5f,0xfd,0x0b,0xab]
+
+        cmn x12, x13, lsr #0
+        cmn x14, x15, lsr #41
+        cmn x16, x17, lsr #63
+// CHECK: cmn      x12, x13, lsr #0           // encoding: [0x9f,0x01,0x4d,0xab]
+// CHECK: cmn      x14, x15, lsr #41          // encoding: [0xdf,0xa5,0x4f,0xab]
+// CHECK: cmn      x16, x17, lsr #63          // encoding: [0x1f,0xfe,0x51,0xab]
+
+        cmn x18, x19, asr #0
+        cmn x20, x21, asr #55
+        cmn x22, x23, asr #63
+// CHECK: cmn      x18, x19, asr #0           // encoding: [0x5f,0x02,0x93,0xab]
+// CHECK: cmn      x20, x21, asr #55          // encoding: [0x9f,0xde,0x95,0xab]
+// CHECK: cmn      x22, x23, asr #63          // encoding: [0xdf,0xfe,0x97,0xab]
+
+        cmp w0, w3
+        cmp wzr, w4
+        cmp w5, wzr
+// CHECK: cmp      w0, w3                     // encoding: [0x1f,0x00,0x03,0x6b]
+// CHECK: cmp      wzr, w4                    // encoding: [0xff,0x03,0x04,0x6b]
+// CHECK: cmp      w5, wzr                    // encoding: [0xbf,0x00,0x1f,0x6b]
+
+        cmp w6, w7, lsl #0
+        cmp w8, w9, lsl #15
+        cmp w10, w11, lsl #31
+// CHECK: cmp      w6, w7                     // encoding: [0xdf,0x00,0x07,0x6b]
+// CHECK: cmp      w8, w9, lsl #15            // encoding: [0x1f,0x3d,0x09,0x6b]
+// CHECK: cmp      w10, w11, lsl #31          // encoding: [0x5f,0x7d,0x0b,0x6b]
+
+        cmp w12, w13, lsr #0
+        cmp w14, w15, lsr #21
+        cmp w16, w17, lsr #31
+// CHECK: cmp      w12, w13, lsr #0           // encoding: [0x9f,0x01,0x4d,0x6b]
+// CHECK: cmp      w14, w15, lsr #21          // encoding: [0xdf,0x55,0x4f,0x6b]
+// CHECK: cmp      w16, w17, lsr #31          // encoding: [0x1f,0x7e,0x51,0x6b]
+
+        cmp w18, w19, asr #0
+        cmp w20, w21, asr #22
+        cmp w22, w23, asr #31
+// CHECK: cmp      w18, w19, asr #0           // encoding: [0x5f,0x02,0x93,0x6b]
+// CHECK: cmp      w20, w21, asr #22          // encoding: [0x9f,0x5a,0x95,0x6b]
+// CHECK: cmp      w22, w23, asr #31          // encoding: [0xdf,0x7e,0x97,0x6b]
+
+        cmp x0, x3
+        cmp xzr, x4
+        cmp x5, xzr
+// CHECK: cmp      x0, x3                     // encoding: [0x1f,0x00,0x03,0xeb]
+// CHECK: cmp      xzr, x4                    // encoding: [0xff,0x03,0x04,0xeb]
+// CHECK: cmp      x5, xzr                    // encoding: [0xbf,0x00,0x1f,0xeb]
+
+        cmp x6, x7, lsl #0
+        cmp x8, x9, lsl #15
+        cmp x10, x11, lsl #63
+// CHECK: cmp      x6, x7                     // encoding: [0xdf,0x00,0x07,0xeb]
+// CHECK: cmp      x8, x9, lsl #15            // encoding: [0x1f,0x3d,0x09,0xeb]
+// CHECK: cmp      x10, x11, lsl #63          // encoding: [0x5f,0xfd,0x0b,0xeb]
+
+        cmp x12, x13, lsr #0
+        cmp x14, x15, lsr #41
+        cmp x16, x17, lsr #63
+// CHECK: cmp      x12, x13, lsr #0           // encoding: [0x9f,0x01,0x4d,0xeb]
+// CHECK: cmp      x14, x15, lsr #41          // encoding: [0xdf,0xa5,0x4f,0xeb]
+// CHECK: cmp      x16, x17, lsr #63          // encoding: [0x1f,0xfe,0x51,0xeb]
+
+        cmp x18, x19, asr #0
+        cmp x20, x21, asr #55
+        cmp x22, x23, asr #63
+// CHECK: cmp      x18, x19, asr #0           // encoding: [0x5f,0x02,0x93,0xeb]
+// CHECK: cmp      x20, x21, asr #55          // encoding: [0x9f,0xde,0x95,0xeb]
+// CHECK: cmp      x22, x23, asr #63          // encoding: [0xdf,0xfe,0x97,0xeb]
+
+        neg w29, w30
+        neg w30, wzr
+        neg wzr, w0
+// CHECK: sub      w29, wzr, w30              // encoding: [0xfd,0x03,0x1e,0x4b]
+// CHECK: sub      w30, wzr, wzr              // encoding: [0xfe,0x03,0x1f,0x4b]
+// CHECK: sub      wzr, wzr, w0                    // encoding: [0xff,0x03,0x00,0x4b]
+
+        neg w28, w27, lsl #0
+        neg w26, w25, lsl #29
+        neg w24, w23, lsl #31
+// CHECK: sub      w28, wzr, w27              // encoding: [0xfc,0x03,0x1b,0x4b]
+// CHECK: sub      w26, wzr, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x4b]
+// CHECK: sub      w24, wzr, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x4b]
+
+        neg w22, w21, lsr #0
+        neg w20, w19, lsr #1
+        neg w18, w17, lsr #31
+// CHECK: sub      w22, wzr, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x4b]
+// CHECK: sub      w20, wzr, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x4b]
+// CHECK: sub      w18, wzr, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x4b]
+
+        neg w16, w15, asr #0
+        neg w14, w13, asr #12
+        neg w12, w11, asr #31
+// CHECK: sub      w16, wzr, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x4b]
+// CHECK: sub      w14, wzr, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x4b]
+// CHECK: sub      w12, wzr, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x4b]
+
+        neg x29, x30
+        neg x30, xzr
+        neg xzr, x0
+// CHECK: sub      x29, xzr, x30              // encoding: [0xfd,0x03,0x1e,0xcb]
+// CHECK: sub      x30, xzr, xzr              // encoding: [0xfe,0x03,0x1f,0xcb]
+// CHECK: sub      xzr, xzr, x0               // encoding: [0xff,0x03,0x00,0xcb]
+
+        neg x28, x27, lsl #0
+        neg x26, x25, lsl #29
+        neg x24, x23, lsl #31
+// CHECK: sub      x28, xzr, x27              // encoding: [0xfc,0x03,0x1b,0xcb]
+// CHECK: sub      x26, xzr, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xcb]
+// CHECK: sub      x24, xzr, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xcb]
+
+        neg x22, x21, lsr #0
+        neg x20, x19, lsr #1
+        neg x18, x17, lsr #31
+// CHECK: sub      x22, xzr, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xcb]
+// CHECK: sub      x20, xzr, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xcb]
+// CHECK: sub      x18, xzr, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xcb]
+
+        neg x16, x15, asr #0
+        neg x14, x13, asr #12
+        neg x12, x11, asr #31
+// CHECK: sub      x16, xzr, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xcb]
+// CHECK: sub      x14, xzr, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xcb]
+// CHECK: sub      x12, xzr, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xcb]
+
+        negs w29, w30
+        negs w30, wzr
+        negs wzr, w0
+// CHECK: subs     w29, wzr, w30              // encoding: [0xfd,0x03,0x1e,0x6b]
+// CHECK: subs     w30, wzr, wzr              // encoding: [0xfe,0x03,0x1f,0x6b]
+// CHECK: subs     wzr, wzr, w0               // encoding: [0xff,0x03,0x00,0x6b]
+
+        negs w28, w27, lsl #0
+        negs w26, w25, lsl #29
+        negs w24, w23, lsl #31
+// CHECK: subs     w28, wzr, w27              // encoding: [0xfc,0x03,0x1b,0x6b]
+// CHECK: subs     w26, wzr, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x6b]
+// CHECK: subs     w24, wzr, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x6b]
+
+        negs w22, w21, lsr #0
+        negs w20, w19, lsr #1
+        negs w18, w17, lsr #31
+// CHECK: subs     w22, wzr, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x6b]
+// CHECK: subs     w20, wzr, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x6b]
+// CHECK: subs     w18, wzr, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x6b]
+
+        negs w16, w15, asr #0
+        negs w14, w13, asr #12
+        negs w12, w11, asr #31
+// CHECK: subs     w16, wzr, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x6b]
+// CHECK: subs     w14, wzr, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x6b]
+// CHECK: subs     w12, wzr, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x6b]
+
+        negs x29, x30
+        negs x30, xzr
+        negs xzr, x0
+// CHECK: subs     x29, xzr, x30              // encoding: [0xfd,0x03,0x1e,0xeb]
+// CHECK: subs     x30, xzr, xzr              // encoding: [0xfe,0x03,0x1f,0xeb]
+// CHECK: subs     xzr, xzr, x0               // encoding: [0xff,0x03,0x00,0xeb]
+
+        negs x28, x27, lsl #0
+        negs x26, x25, lsl #29
+        negs x24, x23, lsl #31
+// CHECK: subs     x28, xzr, x27              // encoding: [0xfc,0x03,0x1b,0xeb]
+// CHECK: subs     x26, xzr, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xeb]
+// CHECK: subs     x24, xzr, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xeb]
+
+        negs x22, x21, lsr #0
+        negs x20, x19, lsr #1
+        negs x18, x17, lsr #31
+// CHECK: subs     x22, xzr, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xeb]
+// CHECK: subs     x20, xzr, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xeb]
+// CHECK: subs     x18, xzr, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xeb]
+
+        negs x16, x15, asr #0
+        negs x14, x13, asr #12
+        negs x12, x11, asr #31
+// CHECK: subs     x16, xzr, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xeb]
+// CHECK: subs     x14, xzr, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xeb]
+// CHECK: subs     x12, xzr, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xeb]
+
+//------------------------------------------------------------------------------
+// Add-sub (shifted register)
+//------------------------------------------------------------------------------
+        adc w29, w27, w25
+        adc wzr, w3, w4
+        adc w9, wzr, w10
+        adc w20, w0, wzr
+// CHECK: adc      w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x1a]
+// CHECK: adc      wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x1a]
+// CHECK: adc      w9, wzr, w10               // encoding: [0xe9,0x03,0x0a,0x1a]
+// CHECK: adc      w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x1a]
+
+        adc x29, x27, x25
+        adc xzr, x3, x4
+        adc x9, xzr, x10
+        adc x20, x0, xzr
+// CHECK: adc      x29, x27, x25              // encoding: [0x7d,0x03,0x19,0x9a]
+// CHECK: adc      xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0x9a]
+// CHECK: adc      x9, xzr, x10               // encoding: [0xe9,0x03,0x0a,0x9a]
+// CHECK: adc      x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0x9a]
+
+        adcs w29, w27, w25
+        adcs wzr, w3, w4
+        adcs w9, wzr, w10
+        adcs w20, w0, wzr
+// CHECK: adcs     w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x3a]
+// CHECK: adcs     wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x3a]
+// CHECK: adcs     w9, wzr, w10               // encoding: [0xe9,0x03,0x0a,0x3a]
+// CHECK: adcs     w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x3a]
+
+        adcs x29, x27, x25
+        adcs xzr, x3, x4
+        adcs x9, xzr, x10
+        adcs x20, x0, xzr
+// CHECK: adcs     x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xba]
+// CHECK: adcs     xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xba]
+// CHECK: adcs     x9, xzr, x10               // encoding: [0xe9,0x03,0x0a,0xba]
+// CHECK: adcs     x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xba]
+
+        sbc w29, w27, w25
+        sbc wzr, w3, w4
+        sbc w9, wzr, w10
+        sbc w20, w0, wzr
+// CHECK: sbc      w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x5a]
+// CHECK: sbc      wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x5a]
+// CHECK: ngc      w9, w10                    // encoding: [0xe9,0x03,0x0a,0x5a]
+// CHECK: sbc      w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x5a]
+
+        sbc x29, x27, x25
+        sbc xzr, x3, x4
+        sbc x9, xzr, x10
+        sbc x20, x0, xzr
+// CHECK: sbc      x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xda]
+// CHECK: sbc      xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xda]
+// CHECK: ngc      x9, x10                    // encoding: [0xe9,0x03,0x0a,0xda]
+// CHECK: sbc      x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xda]
+
+        sbcs w29, w27, w25
+        sbcs wzr, w3, w4
+        sbcs w9, wzr, w10
+        sbcs w20, w0, wzr
+// CHECK: sbcs     w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x7a]
+// CHECK: sbcs     wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x7a]
+// CHECK: ngcs     w9, w10                    // encoding: [0xe9,0x03,0x0a,0x7a]
+// CHECK: sbcs     w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x7a]
+
+        sbcs x29, x27, x25
+        sbcs xzr, x3, x4
+        sbcs x9, xzr, x10
+        sbcs x20, x0, xzr
+// CHECK: sbcs     x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xfa]
+// CHECK: sbcs     xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xfa]
+// CHECK: ngcs     x9, x10                    // encoding: [0xe9,0x03,0x0a,0xfa]
+// CHECK: sbcs     x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xfa]
+
+        ngc w3, w12
+        ngc wzr, w9
+        ngc w23, wzr
+// CHECK: ngc      w3, w12                    // encoding: [0xe3,0x03,0x0c,0x5a]
+// CHECK: ngc      wzr, w9                    // encoding: [0xff,0x03,0x09,0x5a]
+// CHECK: ngc      w23, wzr                   // encoding: [0xf7,0x03,0x1f,0x5a]
+
+        ngc x29, x30
+        ngc xzr, x0
+        ngc x0, xzr
+// CHECK: ngc      x29, x30                   // encoding: [0xfd,0x03,0x1e,0xda]
+// CHECK: ngc      xzr, x0                    // encoding: [0xff,0x03,0x00,0xda]
+// CHECK: ngc      x0, xzr                    // encoding: [0xe0,0x03,0x1f,0xda]
+
+        ngcs w3, w12
+        ngcs wzr, w9
+        ngcs w23, wzr
+// CHECK: ngcs     w3, w12                    // encoding: [0xe3,0x03,0x0c,0x7a]
+// CHECK: ngcs     wzr, w9                    // encoding: [0xff,0x03,0x09,0x7a]
+// CHECK: ngcs     w23, wzr                   // encoding: [0xf7,0x03,0x1f,0x7a]
+
+        ngcs x29, x30
+        ngcs xzr, x0
+        ngcs x0, xzr
+// CHECK: ngcs     x29, x30                   // encoding: [0xfd,0x03,0x1e,0xfa]
+// CHECK: ngcs     xzr, x0                    // encoding: [0xff,0x03,0x00,0xfa]
+// CHECK: ngcs     x0, xzr                    // encoding: [0xe0,0x03,0x1f,0xfa]
+
+//------------------------------------------------------------------------------
+// Bitfield
+//------------------------------------------------------------------------------
+
+        sbfm x1, x2, #3, #4
+        sbfm x3, x4, #63, #63
+        sbfm wzr, wzr, #31, #31
+        sbfm w12, w9, #0, #0
+// CHECK: sbfm     x1, x2, #3, #4             // encoding: [0x41,0x10,0x43,0x93]
+// CHECK: sbfm     x3, x4, #63, #63           // encoding: [0x83,0xfc,0x7f,0x93]
+// CHECK: sbfm     wzr, wzr, #31, #31         // encoding: [0xff,0x7f,0x1f,0x13]
+// CHECK: sbfm     w12, w9, #0, #0            // encoding: [0x2c,0x01,0x00,0x13]
+
+        ubfm x4, x5, #12, #10
+        ubfm xzr, x4, #0, #0
+        ubfm x4, xzr, #63, #5
+        ubfm x5, x6, #12, #63
+// CHECK: ubfm     x4, x5, #12, #10           // encoding: [0xa4,0x28,0x4c,0xd3]
+// CHECK: ubfm     xzr, x4, #0, #0            // encoding: [0x9f,0x00,0x40,0xd3]
+// CHECK: ubfm     x4, xzr, #63, #5            // encoding: [0xe4,0x17,0x7f,0xd3]
+// CHECK: ubfm     x5, x6, #12, #63           // encoding: [0xc5,0xfc,0x4c,0xd3]
+
+        bfm x4, x5, #12, #10
+        bfm xzr, x4, #0, #0
+        bfm x4, xzr, #63, #5
+        bfm x5, x6, #12, #63
+// CHECK: bfm      x4, x5, #12, #10           // encoding: [0xa4,0x28,0x4c,0xb3]
+// CHECK: bfm      xzr, x4, #0, #0            // encoding: [0x9f,0x00,0x40,0xb3]
+// CHECK: bfm      x4, xzr, #63, #5            // encoding: [0xe4,0x17,0x7f,0xb3]
+// CHECK: bfm      x5, x6, #12, #63           // encoding: [0xc5,0xfc,0x4c,0xb3]
+
+        sxtb w1, w2
+        sxtb xzr, w3
+        sxth w9, w10
+        sxth x0, w1
+        sxtw x3, w30
+// CHECK: sxtb     w1, w2                     // encoding: [0x41,0x1c,0x00,0x13]
+// CHECK: sxtb     xzr, w3                    // encoding: [0x7f,0x1c,0x40,0x93]
+// CHECK: sxth     w9, w10                    // encoding: [0x49,0x3d,0x00,0x13]
+// CHECK: sxth     x0, w1                     // encoding: [0x20,0x3c,0x40,0x93]
+// CHECK: sxtw     x3, w30                    // encoding: [0xc3,0x7f,0x40,0x93]
+
+        uxtb w1, w2
+        uxtb xzr, w3
+        uxth w9, w10
+        uxth x0, w1
+// CHECK: uxtb     w1, w2                     // encoding: [0x41,0x1c,0x00,0x53]
+// CHECK: uxtb     xzr, w3                    // encoding: [0x7f,0x1c,0x00,0x53]
+// CHECK: uxth     w9, w10                    // encoding: [0x49,0x3d,0x00,0x53]
+// CHECK: uxth     x0, w1                     // encoding: [0x20,0x3c,0x00,0x53]
+
+        asr w3, w2, #0
+        asr w9, w10, #31
+        asr x20, x21, #63
+        asr w1, wzr, #3
+// CHECK: asr      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x13]
+// CHECK: asr      w9, w10, #31               // encoding: [0x49,0x7d,0x1f,0x13]
+// CHECK: asr      x20, x21, #63              // encoding: [0xb4,0xfe,0x7f,0x93]
+// CHECK: asr      w1, wzr, #3                // encoding: [0xe1,0x7f,0x03,0x13]
+
+        lsr w3, w2, #0
+        lsr w9, w10, #31
+        lsr x20, x21, #63
+        lsr wzr, wzr, #3
+// CHECK: lsr      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x53]
+// CHECK: lsr      w9, w10, #31               // encoding: [0x49,0x7d,0x1f,0x53]
+// CHECK: lsr      x20, x21, #63              // encoding: [0xb4,0xfe,0x7f,0xd3]
+// CHECK: lsr      wzr, wzr, #3               // encoding: [0xff,0x7f,0x03,0x53]
+
+        lsl w3, w2, #0
+        lsl w9, w10, #31
+        lsl x20, x21, #63
+        lsl w1, wzr, #3
+// CHECK: lsl      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x53]
+// CHECK: lsl      w9, w10, #31               // encoding: [0x49,0x01,0x01,0x53]
+// CHECK: lsl      x20, x21, #63              // encoding: [0xb4,0x02,0x41,0xd3]
+// CHECK: lsl      w1, wzr, #3                // encoding: [0xe1,0x73,0x1d,0x53]
+
+        sbfiz w9, w10, #0, #1
+        sbfiz x2, x3, #63, #1
+        sbfiz x19, x20, #0, #64
+        sbfiz x9, x10, #5, #59
+        sbfiz w9, w10, #0, #32
+        sbfiz w11, w12, #31, #1
+        sbfiz w13, w14, #29, #3
+        sbfiz xzr, xzr, #10, #11
+// CHECK: sbfiz    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
+// CHECK: sbfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0x93]
+// CHECK: sbfiz    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: sbfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0x93]
+// CHECK: sbfiz    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: sbfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x13]
+// CHECK: sbfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x13]
+// CHECK: sbfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0x93]
+
+        sbfx w9, w10, #0, #1
+        sbfx x2, x3, #63, #1
+        sbfx x19, x20, #0, #64
+        sbfx x9, x10, #5, #59
+        sbfx w9, w10, #0, #32
+        sbfx w11, w12, #31, #1
+        sbfx w13, w14, #29, #3
+        sbfx xzr, xzr, #10, #11
+// CHECK: sbfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
+// CHECK: sbfx     x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0x93]
+// CHECK: sbfx     x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: sbfx     x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0x93]
+// CHECK: sbfx     w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: sbfx     w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x13]
+// CHECK: sbfx     w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x13]
+// CHECK: sbfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0x93]
+
+        bfi w9, w10, #0, #1
+        bfi x2, x3, #63, #1
+        bfi x19, x20, #0, #64
+        bfi x9, x10, #5, #59
+        bfi w9, w10, #0, #32
+        bfi w11, w12, #31, #1
+        bfi w13, w14, #29, #3
+        bfi xzr, xzr, #10, #11
+// CHECK: bfi      w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
+// CHECK: bfi      x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xb3]
+// CHECK: bfi      x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
+// CHECK: bfi      x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xb3]
+// CHECK: bfi      w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
+// CHECK: bfi      w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x33]
+// CHECK: bfi      w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x33]
+// CHECK: bfi      xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xb3]
+
+        bfxil w9, w10, #0, #1
+        bfxil x2, x3, #63, #1
+        bfxil x19, x20, #0, #64
+        bfxil x9, x10, #5, #59
+        bfxil w9, w10, #0, #32
+        bfxil w11, w12, #31, #1
+        bfxil w13, w14, #29, #3
+        bfxil xzr, xzr, #10, #11
+// CHECK: bfxil    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
+// CHECK: bfxil    x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0xb3]
+// CHECK: bfxil    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
+// CHECK: bfxil    x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0xb3]
+// CHECK: bfxil    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
+// CHECK: bfxil    w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x33]
+// CHECK: bfxil    w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x33]
+// CHECK: bfxil    xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0xb3]
+
+        ubfiz w9, w10, #0, #1
+        ubfiz x2, x3, #63, #1
+        ubfiz x19, x20, #0, #64
+        ubfiz x9, x10, #5, #59
+        ubfiz w9, w10, #0, #32
+        ubfiz w11, w12, #31, #1
+        ubfiz w13, w14, #29, #3
+        ubfiz xzr, xzr, #10, #11
+// CHECK: ubfiz    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: ubfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xd3]
+// CHECK: ubfiz    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: ubfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xd3]
+// CHECK: ubfiz    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: ubfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x53]
+// CHECK: ubfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x53]
+// CHECK: ubfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xd3]
+
+        ubfx w9, w10, #0, #1
+        ubfx x2, x3, #63, #1
+        ubfx x19, x20, #0, #64
+        ubfx x9, x10, #5, #59
+        ubfx w9, w10, #0, #32
+        ubfx w11, w12, #31, #1
+        ubfx w13, w14, #29, #3
+        ubfx xzr, xzr, #10, #11
+// CHECK: ubfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: ubfx     x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0xd3]
+// CHECK: ubfx     x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: ubfx     x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0xd3]
+// CHECK: ubfx     w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: ubfx     w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x53]
+// CHECK: ubfx     w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x53]
+// CHECK: ubfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0xd3]
+
+//------------------------------------------------------------------------------
+// Compare & branch (immediate)
+//------------------------------------------------------------------------------
+
+        cbz w5, lbl
+        cbz x5, lbl
+        cbnz x2, lbl
+        cbnz x26, lbl
+// CHECK: cbz      w5, lbl                // encoding: [0x05'A',A,A,0x34'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbz      x5, lbl                // encoding: [0x05'A',A,A,0xb4'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     x2, lbl                // encoding: [0x02'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     x26, lbl               // encoding: [0x1a'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        cbz wzr, lbl
+        cbnz xzr, lbl
+// CHECK: cbz      wzr, lbl               // encoding: [0x1f'A',A,A,0x34'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     xzr, lbl               // encoding: [0x1f'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        cbz w5, #0
+        cbnz x3, #-4
+        cbz w20, #1048572
+        cbnz xzr, #-1048576
+// CHECK: cbz     w5, #0                  // encoding: [0x05,0x00,0x00,0x34]
+// CHECK: cbnz    x3, #-4                 // encoding: [0xe3,0xff,0xff,0xb5]
+// CHECK: cbz     w20, #1048572           // encoding: [0xf4,0xff,0x7f,0x34]
+// CHECK: cbnz    xzr, #-1048576          // encoding: [0x1f,0x00,0x80,0xb5]
+
+//------------------------------------------------------------------------------
+// Conditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b.eq lbl
+        b.ne lbl
+        b.cs lbl
+        b.hs lbl
+        b.lo lbl
+        b.cc lbl
+        b.mi lbl
+        b.pl lbl
+        b.vs lbl
+        b.vc lbl
+        b.hi lbl
+        b.ls lbl
+        b.ge lbl
+        b.lt lbl
+        b.gt lbl
+        b.le lbl
+        b.al lbl
+// CHECK: b.eq lbl                        // encoding: [A,A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ne lbl                        // encoding: [0x01'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.mi lbl                        // encoding: [0x04'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.pl lbl                        // encoding: [0x05'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.vs lbl                        // encoding: [0x06'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.vc lbl                        // encoding: [0x07'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hi lbl                        // encoding: [0x08'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ls lbl                        // encoding: [0x09'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ge lbl                        // encoding: [0x0a'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lt lbl                        // encoding: [0x0b'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.gt lbl                        // encoding: [0x0c'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.le lbl                        // encoding: [0x0d'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.al lbl                        // encoding: [0x0e'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        b.eq #0
+        b.lt #-4
+        b.cc #1048572
+// CHECK: b.eq #0                         // encoding: [0x00,0x00,0x00,0x54]
+// CHECK: b.lt #-4                        // encoding: [0xeb,0xff,0xff,0x54]
+// CHECK: b.lo #1048572                   // encoding: [0xe3,0xff,0x7f,0x54]
+
+//------------------------------------------------------------------------------
+// Conditional compare (immediate)
+//------------------------------------------------------------------------------
+
+        ccmp w1, #31, #0, eq
+        ccmp w3, #0, #15, hs
+        ccmp wzr, #15, #13, cs
+// CHECK: ccmp    w1, #31, #0, eq         // encoding: [0x20,0x08,0x5f,0x7a]
+// CHECK: ccmp    w3, #0, #15, hs         // encoding: [0x6f,0x28,0x40,0x7a]
+// CHECK: ccmp    wzr, #15, #13, hs       // encoding: [0xed,0x2b,0x4f,0x7a]
+
+        ccmp x9, #31, #0, le
+        ccmp x3, #0, #15, gt
+        ccmp xzr, #5, #7, ne
+// CHECK: ccmp    x9, #31, #0, le         // encoding: [0x20,0xd9,0x5f,0xfa]
+// CHECK: ccmp    x3, #0, #15, gt         // encoding: [0x6f,0xc8,0x40,0xfa]
+// CHECK: ccmp    xzr, #5, #7, ne         // encoding: [0xe7,0x1b,0x45,0xfa]
+
+        ccmn w1, #31, #0, eq
+        ccmn w3, #0, #15, hs
+        ccmn wzr, #15, #13, cs
+// CHECK: ccmn    w1, #31, #0, eq         // encoding: [0x20,0x08,0x5f,0x3a]
+// CHECK: ccmn    w3, #0, #15, hs         // encoding: [0x6f,0x28,0x40,0x3a]
+// CHECK: ccmn    wzr, #15, #13, hs       // encoding: [0xed,0x2b,0x4f,0x3a]
+
+        ccmn x9, #31, #0, le
+        ccmn x3, #0, #15, gt
+        ccmn xzr, #5, #7, ne
+// CHECK: ccmn    x9, #31, #0, le         // encoding: [0x20,0xd9,0x5f,0xba]
+// CHECK: ccmn    x3, #0, #15, gt         // encoding: [0x6f,0xc8,0x40,0xba]
+// CHECK: ccmn    xzr, #5, #7, ne         // encoding: [0xe7,0x1b,0x45,0xba]
+
+//------------------------------------------------------------------------------
+// Conditional compare (register)
+//------------------------------------------------------------------------------
+
+        ccmp w1, wzr, #0, eq
+        ccmp w3, w0, #15, hs
+        ccmp wzr, w15, #13, cs
+// CHECK: ccmp    w1, wzr, #0, eq         // encoding: [0x20,0x00,0x5f,0x7a]
+// CHECK: ccmp    w3, w0, #15, hs         // encoding: [0x6f,0x20,0x40,0x7a]
+// CHECK: ccmp    wzr, w15, #13, hs       // encoding: [0xed,0x23,0x4f,0x7a]
+
+        ccmp x9, xzr, #0, le
+        ccmp x3, x0, #15, gt
+        ccmp xzr, x5, #7, ne
+// CHECK: ccmp    x9, xzr, #0, le         // encoding: [0x20,0xd1,0x5f,0xfa]
+// CHECK: ccmp    x3, x0, #15, gt         // encoding: [0x6f,0xc0,0x40,0xfa]
+// CHECK: ccmp    xzr, x5, #7, ne         // encoding: [0xe7,0x13,0x45,0xfa]
+
+        ccmn w1, wzr, #0, eq
+        ccmn w3, w0, #15, hs
+        ccmn wzr, w15, #13, cs
+// CHECK: ccmn    w1, wzr, #0, eq         // encoding: [0x20,0x00,0x5f,0x3a]
+// CHECK: ccmn    w3, w0, #15, hs         // encoding: [0x6f,0x20,0x40,0x3a]
+// CHECK: ccmn    wzr, w15, #13, hs       // encoding: [0xed,0x23,0x4f,0x3a]
+
+        ccmn x9, xzr, #0, le
+        ccmn x3, x0, #15, gt
+        ccmn xzr, x5, #7, ne
+// CHECK: ccmn    x9, xzr, #0, le         // encoding: [0x20,0xd1,0x5f,0xba]
+// CHECK: ccmn    x3, x0, #15, gt         // encoding: [0x6f,0xc0,0x40,0xba]
+// CHECK: ccmn    xzr, x5, #7, ne         // encoding: [0xe7,0x13,0x45,0xba]
+
+//------------------------------------------------------------------------------
+// Conditional select
+//------------------------------------------------------------------------------
+        csel w1, w0, w19, ne
+        csel wzr, w5, w9, eq
+        csel w9, wzr, w30, gt
+        csel w1, w28, wzr, mi
+// CHECK: csel     w1, w0, w19, ne            // encoding: [0x01,0x10,0x93,0x1a]
+// CHECK: csel     wzr, w5, w9, eq            // encoding: [0xbf,0x00,0x89,0x1a]
+// CHECK: csel     w9, wzr, w30, gt           // encoding: [0xe9,0xc3,0x9e,0x1a]
+// CHECK: csel     w1, w28, wzr, mi           // encoding: [0x81,0x43,0x9f,0x1a]
+
+        csel x19, x23, x29, lt
+        csel xzr, x3, x4, ge
+        csel x5, xzr, x6, cs
+        csel x7, x8, xzr, cc
+// CHECK: csel     x19, x23, x29, lt          // encoding: [0xf3,0xb2,0x9d,0x9a]
+// CHECK: csel     xzr, x3, x4, ge            // encoding: [0x7f,0xa0,0x84,0x9a]
+// CHECK: csel     x5, xzr, x6, hs            // encoding: [0xe5,0x23,0x86,0x9a]
+// CHECK: csel     x7, x8, xzr, lo            // encoding: [0x07,0x31,0x9f,0x9a]
+
+        csinc w1, w0, w19, ne
+        csinc wzr, w5, w9, eq
+        csinc w9, wzr, w30, gt
+        csinc w1, w28, wzr, mi
+// CHECK: csinc    w1, w0, w19, ne            // encoding: [0x01,0x14,0x93,0x1a]
+// CHECK: csinc    wzr, w5, w9, eq            // encoding: [0xbf,0x04,0x89,0x1a]
+// CHECK: csinc    w9, wzr, w30, gt           // encoding: [0xe9,0xc7,0x9e,0x1a]
+// CHECK: csinc    w1, w28, wzr, mi           // encoding: [0x81,0x47,0x9f,0x1a]
+
+        csinc x19, x23, x29, lt
+        csinc xzr, x3, x4, ge
+        csinc x5, xzr, x6, cs
+        csinc x7, x8, xzr, cc
+// CHECK: csinc    x19, x23, x29, lt          // encoding: [0xf3,0xb6,0x9d,0x9a]
+// CHECK: csinc    xzr, x3, x4, ge            // encoding: [0x7f,0xa4,0x84,0x9a]
+// CHECK: csinc    x5, xzr, x6, hs            // encoding: [0xe5,0x27,0x86,0x9a]
+// CHECK: csinc    x7, x8, xzr, lo            // encoding: [0x07,0x35,0x9f,0x9a]
+
+        csinv w1, w0, w19, ne
+        csinv wzr, w5, w9, eq
+        csinv w9, wzr, w30, gt
+        csinv w1, w28, wzr, mi
+// CHECK: csinv    w1, w0, w19, ne            // encoding: [0x01,0x10,0x93,0x5a]
+// CHECK: csinv    wzr, w5, w9, eq            // encoding: [0xbf,0x00,0x89,0x5a]
+// CHECK: csinv    w9, wzr, w30, gt           // encoding: [0xe9,0xc3,0x9e,0x5a]
+// CHECK: csinv    w1, w28, wzr, mi           // encoding: [0x81,0x43,0x9f,0x5a]
+
+        csinv x19, x23, x29, lt
+        csinv xzr, x3, x4, ge
+        csinv x5, xzr, x6, cs
+        csinv x7, x8, xzr, cc
+// CHECK: csinv    x19, x23, x29, lt          // encoding: [0xf3,0xb2,0x9d,0xda]
+// CHECK: csinv    xzr, x3, x4, ge            // encoding: [0x7f,0xa0,0x84,0xda]
+// CHECK: csinv    x5, xzr, x6, hs            // encoding: [0xe5,0x23,0x86,0xda]
+// CHECK: csinv    x7, x8, xzr, lo            // encoding: [0x07,0x31,0x9f,0xda]
+
+        csneg w1, w0, w19, ne
+        csneg wzr, w5, w9, eq
+        csneg w9, wzr, w30, gt
+        csneg w1, w28, wzr, mi
+// CHECK: csneg    w1, w0, w19, ne            // encoding: [0x01,0x14,0x93,0x5a]
+// CHECK: csneg    wzr, w5, w9, eq            // encoding: [0xbf,0x04,0x89,0x5a]
+// CHECK: csneg    w9, wzr, w30, gt           // encoding: [0xe9,0xc7,0x9e,0x5a]
+// CHECK: csneg    w1, w28, wzr, mi           // encoding: [0x81,0x47,0x9f,0x5a]
+
+        csneg x19, x23, x29, lt
+        csneg xzr, x3, x4, ge
+        csneg x5, xzr, x6, cs
+        csneg x7, x8, xzr, cc
+// CHECK: csneg    x19, x23, x29, lt          // encoding: [0xf3,0xb6,0x9d,0xda]
+// CHECK: csneg    xzr, x3, x4, ge            // encoding: [0x7f,0xa4,0x84,0xda]
+// CHECK: csneg    x5, xzr, x6, hs            // encoding: [0xe5,0x27,0x86,0xda]
+// CHECK: csneg    x7, x8, xzr, lo            // encoding: [0x07,0x35,0x9f,0xda]
+
+        cset w3, eq
+        cset x9, pl
+// CHECK: csinc    w3, wzr, wzr, ne           // encoding: [0xe3,0x17,0x9f,0x1a]
+// CHECK: csinc    x9, xzr, xzr, mi           // encoding: [0xe9,0x47,0x9f,0x9a]
+
+        csetm w20, ne
+        csetm x30, ge
+// CHECK: csinv    w20, wzr, wzr, eq          // encoding: [0xf4,0x03,0x9f,0x5a]
+// CHECK: csinv    x30, xzr, xzr, lt          // encoding: [0xfe,0xb3,0x9f,0xda]
+
+        cinc w3, w5, gt
+        cinc wzr, w4, le
+        cinc w9, wzr, lt
+// CHECK: csinc    w3, w5, w5, le             // encoding: [0xa3,0xd4,0x85,0x1a]
+// CHECK: csinc    wzr, w4, w4, gt            // encoding: [0x9f,0xc4,0x84,0x1a]
+// CHECK: csinc    w9, wzr, wzr, ge           // encoding: [0xe9,0xa7,0x9f,0x1a]
+
+        cinc x3, x5, gt
+        cinc xzr, x4, le
+        cinc x9, xzr, lt
+// CHECK: csinc     x3, x5, x5, le             // encoding: [0xa3,0xd4,0x85,0x9a]
+// CHECK: csinc     xzr, x4, x4, gt            // encoding: [0x9f,0xc4,0x84,0x9a]
+// CHECK: csinc     x9, xzr, xzr, ge           // encoding: [0xe9,0xa7,0x9f,0x9a]
+
+        cinv w3, w5, gt
+        cinv wzr, w4, le
+        cinv w9, wzr, lt
+// CHECK: csinv    w3, w5, w5, le             // encoding: [0xa3,0xd0,0x85,0x5a]
+// CHECK: csinv    wzr, w4, w4, gt            // encoding: [0x9f,0xc0,0x84,0x5a]
+// CHECK: csinv    w9, wzr, wzr, ge           // encoding: [0xe9,0xa3,0x9f,0x5a]
+
+        cinv x3, x5, gt
+        cinv xzr, x4, le
+        cinv x9, xzr, lt
+// CHECK: csinv    x3, x5, x5, le             // encoding: [0xa3,0xd0,0x85,0xda]
+// CHECK: csinv    xzr, x4, x4, gt            // encoding: [0x9f,0xc0,0x84,0xda]
+// CHECK: csinv    x9, xzr, xzr, ge           // encoding: [0xe9,0xa3,0x9f,0xda]
+
+        cneg w3, w5, gt
+        cneg wzr, w4, le
+        cneg w9, wzr, lt
+// CHECK: csneg    w3, w5, w5, le             // encoding: [0xa3,0xd4,0x85,0x5a]
+// CHECK: csneg    wzr, w4, w4, gt            // encoding: [0x9f,0xc4,0x84,0x5a]
+// CHECK: csneg    w9, wzr, wzr, ge           // encoding: [0xe9,0xa7,0x9f,0x5a]
+
+        cneg x3, x5, gt
+        cneg xzr, x4, le
+        cneg x9, xzr, lt
+// CHECK: csneg    x3, x5, x5, le             // encoding: [0xa3,0xd4,0x85,0xda]
+// CHECK: csneg    xzr, x4, x4, gt            // encoding: [0x9f,0xc4,0x84,0xda]
+// CHECK: csneg    x9, xzr, xzr, ge           // encoding: [0xe9,0xa7,0x9f,0xda]
+
+//------------------------------------------------------------------------------
+// Data-processing (1 source)
+//------------------------------------------------------------------------------
+
+	rbit	w0, w7
+	rbit	x18, x3
+	rev16	w17, w1
+	rev16	x5, x2
+	rev	w18, w0
+	rev32	x20, x1
+	rev32	x20, xzr
+// CHECK: rbit	w0, w7                       // encoding: [0xe0,0x00,0xc0,0x5a]
+// CHECK: rbit	x18, x3                      // encoding: [0x72,0x00,0xc0,0xda]
+// CHECK: rev16 w17, w1                      // encoding: [0x31,0x04,0xc0,0x5a]
+// CHECK: rev16	x5, x2                       // encoding: [0x45,0x04,0xc0,0xda]
+// CHECK: rev	w18, w0                      // encoding: [0x12,0x08,0xc0,0x5a]
+// CHECK: rev32	x20, x1                      // encoding: [0x34,0x08,0xc0,0xda]
+// CHECK: rev32	x20, xzr                     // encoding: [0xf4,0x0b,0xc0,0xda]
+
+	rev	x22, x2
+	rev	x18, xzr
+	rev	w7, wzr
+	clz	w24, w3
+	clz	x26, x4
+	cls	w3, w5
+	cls	x20, x5
+// CHECK: rev	x22, x2                      // encoding: [0x56,0x0c,0xc0,0xda]
+// CHECK: rev	x18, xzr                     // encoding: [0xf2,0x0f,0xc0,0xda]
+// CHECK: rev	w7, wzr                      // encoding: [0xe7,0x0b,0xc0,0x5a]
+// CHECK: clz	w24, w3                      // encoding: [0x78,0x10,0xc0,0x5a]
+// CHECK: clz	x26, x4                      // encoding: [0x9a,0x10,0xc0,0xda]
+// CHECK: cls	w3, w5                       // encoding: [0xa3,0x14,0xc0,0x5a]
+// CHECK: cls	x20, x5                      // encoding: [0xb4,0x14,0xc0,0xda]
+
+	clz	w24, wzr
+	rev	x22, xzr
+// CHECK: clz	w24, wzr                     // encoding: [0xf8,0x13,0xc0,0x5a]
+// CHECK: rev	x22, xzr                     // encoding: [0xf6,0x0f,0xc0,0xda]
+
+//------------------------------------------------------------------------------
+// Data-processing (2 source)
+//------------------------------------------------------------------------------
+
+        crc32b  w5, w7, w20
+        crc32h  w28, wzr, w30
+        crc32w  w0, w1, w2
+        crc32x  w7, w9, x20
+        crc32cb w9, w5, w4
+        crc32ch w13, w17, w25
+        crc32cw wzr, w3, w5
+        crc32cx w18, w16, xzr
+// CHECK: crc32b   w5, w7, w20             // encoding: [0xe5,0x40,0xd4,0x1a]
+// CHECK: crc32h   w28, wzr, w30           // encoding: [0xfc,0x47,0xde,0x1a]
+// CHECK: crc32w   w0, w1, w2              // encoding: [0x20,0x48,0xc2,0x1a]
+// CHECK: crc32x   w7, w9, x20             // encoding: [0x27,0x4d,0xd4,0x9a]
+// CHECK: crc32cb  w9, w5, w4              // encoding: [0xa9,0x50,0xc4,0x1a]
+// CHECK: crc32ch  w13, w17, w25           // encoding: [0x2d,0x56,0xd9,0x1a]
+// CHECK: crc32cw  wzr, w3, w5             // encoding: [0x7f,0x58,0xc5,0x1a]
+// CHECK: crc32cx  w18, w16, xzr           // encoding: [0x12,0x5e,0xdf,0x9a]
+
+        udiv	w0, w7, w10
+        udiv	x9, x22, x4
+        sdiv	w12, w21, w0
+        sdiv	x13, x2, x1
+        lslv	w11, w12, w13
+        lslv	x14, x15, x16
+        lsrv	w17, w18, w19
+        lsrv	x20, x21, x22
+        asrv	w23, w24, w25
+        asrv	x26, x27, x28
+        rorv	w0, w1, w2
+        rorv    x3, x4, x5
+
+
+// CHECK: udiv	w0, w7, w10                   // encoding: [0xe0,0x08,0xca,0x1a]
+// CHECK: udiv	x9, x22, x4                   // encoding: [0xc9,0x0a,0xc4,0x9a]
+// CHECK: sdiv	w12, w21, w0                  // encoding: [0xac,0x0e,0xc0,0x1a]
+// CHECK: sdiv	x13, x2, x1                   // encoding: [0x4d,0x0c,0xc1,0x9a]
+// CHECK: lsl	w11, w12, w13                 // encoding: [0x8b,0x21,0xcd,0x1a]
+// CHECK: lsl	x14, x15, x16                 // encoding: [0xee,0x21,0xd0,0x9a]
+// CHECK: lsr	w17, w18, w19                 // encoding: [0x51,0x26,0xd3,0x1a]
+// CHECK: lsr	x20, x21, x22                 // encoding: [0xb4,0x26,0xd6,0x9a]
+// CHECK: asr	w23, w24, w25                 // encoding: [0x17,0x2b,0xd9,0x1a]
+// CHECK: asr	x26, x27, x28                 // encoding: [0x7a,0x2b,0xdc,0x9a]
+// CHECK: ror	w0, w1, w2                    // encoding: [0x20,0x2c,0xc2,0x1a]
+// CHECK: ror  x3, x4, x5                     // encoding: [0x83,0x2c,0xc5,0x9a]
+
+
+        lsl	w6, w7, w8
+        lsl	x9, x10, x11
+        lsr	w12, w13, w14
+        lsr	x15, x16, x17
+        asr	w18, w19, w20
+        asr	x21, x22, x23
+        ror	w24, w25, w26
+        ror	x27, x28, x29
+// CHECK: lsl	w6, w7, w8                    // encoding: [0xe6,0x20,0xc8,0x1a]
+// CHECK: lsl	x9, x10, x11                  // encoding: [0x49,0x21,0xcb,0x9a]
+// CHECK: lsr	w12, w13, w14                 // encoding: [0xac,0x25,0xce,0x1a]
+// CHECK: lsr	x15, x16, x17                 // encoding: [0x0f,0x26,0xd1,0x9a]
+// CHECK: asr	w18, w19, w20                 // encoding: [0x72,0x2a,0xd4,0x1a]
+// CHECK: asr	x21, x22, x23                 // encoding: [0xd5,0x2a,0xd7,0x9a]
+// CHECK: ror	w24, w25, w26                 // encoding: [0x38,0x2f,0xda,0x1a]
+// CHECK: ror	x27, x28, x29                 // encoding: [0x9b,0x2f,0xdd,0x9a]
+
+        madd w1, w3, w7, w4
+        madd wzr, w0, w9, w11
+        madd w13, wzr, w4, w4
+        madd w19, w30, wzr, w29
+        madd w4, w5, w6, wzr
+// CHECK: madd     w1, w3, w7, w4             // encoding: [0x61,0x10,0x07,0x1b]
+// CHECK: madd     wzr, w0, w9, w11           // encoding: [0x1f,0x2c,0x09,0x1b]
+// CHECK: madd     w13, wzr, w4, w4           // encoding: [0xed,0x13,0x04,0x1b]
+// CHECK: madd     w19, w30, wzr, w29         // encoding: [0xd3,0x77,0x1f,0x1b]
+// CHECK: mul      w4, w5, w6                 // encoding: [0xa4,0x7c,0x06,0x1b]
+
+        madd x1, x3, x7, x4
+        madd xzr, x0, x9, x11
+        madd x13, xzr, x4, x4
+        madd x19, x30, xzr, x29
+        madd x4, x5, x6, xzr
+// CHECK: madd     x1, x3, x7, x4             // encoding: [0x61,0x10,0x07,0x9b]
+// CHECK: madd     xzr, x0, x9, x11           // encoding: [0x1f,0x2c,0x09,0x9b]
+// CHECK: madd     x13, xzr, x4, x4           // encoding: [0xed,0x13,0x04,0x9b]
+// CHECK: madd     x19, x30, xzr, x29         // encoding: [0xd3,0x77,0x1f,0x9b]
+// CHECK: mul      x4, x5, x6                 // encoding: [0xa4,0x7c,0x06,0x9b]
+
+        msub w1, w3, w7, w4
+        msub wzr, w0, w9, w11
+        msub w13, wzr, w4, w4
+        msub w19, w30, wzr, w29
+        msub w4, w5, w6, wzr
+// CHECK: msub     w1, w3, w7, w4             // encoding: [0x61,0x90,0x07,0x1b]
+// CHECK: msub     wzr, w0, w9, w11           // encoding: [0x1f,0xac,0x09,0x1b]
+// CHECK: msub     w13, wzr, w4, w4           // encoding: [0xed,0x93,0x04,0x1b]
+// CHECK: msub     w19, w30, wzr, w29         // encoding: [0xd3,0xf7,0x1f,0x1b]
+// CHECK: mneg     w4, w5, w6                 // encoding: [0xa4,0xfc,0x06,0x1b]
+
+        msub x1, x3, x7, x4
+        msub xzr, x0, x9, x11
+        msub x13, xzr, x4, x4
+        msub x19, x30, xzr, x29
+        msub x4, x5, x6, xzr
+// CHECK: msub     x1, x3, x7, x4             // encoding: [0x61,0x90,0x07,0x9b]
+// CHECK: msub     xzr, x0, x9, x11           // encoding: [0x1f,0xac,0x09,0x9b]
+// CHECK: msub     x13, xzr, x4, x4           // encoding: [0xed,0x93,0x04,0x9b]
+// CHECK: msub     x19, x30, xzr, x29         // encoding: [0xd3,0xf7,0x1f,0x9b]
+// CHECK: mneg     x4, x5, x6                 // encoding: [0xa4,0xfc,0x06,0x9b]
+
+        smaddl x3, w5, w2, x9
+        smaddl xzr, w10, w11, x12
+        smaddl x13, wzr, w14, x15
+        smaddl x16, w17, wzr, x18
+        smaddl x19, w20, w21, xzr
+// CHECK: smaddl   x3, w5, w2, x9             // encoding: [0xa3,0x24,0x22,0x9b]
+// CHECK: smaddl   xzr, w10, w11, x12         // encoding: [0x5f,0x31,0x2b,0x9b]
+// CHECK: smaddl   x13, wzr, w14, x15         // encoding: [0xed,0x3f,0x2e,0x9b]
+// CHECK: smaddl   x16, w17, wzr, x18         // encoding: [0x30,0x4a,0x3f,0x9b]
+// CHECK: smull    x19, w20, w21              // encoding: [0x93,0x7e,0x35,0x9b]
+
+        smsubl x3, w5, w2, x9
+        smsubl xzr, w10, w11, x12
+        smsubl x13, wzr, w14, x15
+        smsubl x16, w17, wzr, x18
+        smsubl x19, w20, w21, xzr
+// CHECK: smsubl   x3, w5, w2, x9             // encoding: [0xa3,0xa4,0x22,0x9b]
+// CHECK: smsubl   xzr, w10, w11, x12         // encoding: [0x5f,0xb1,0x2b,0x9b]
+// CHECK: smsubl   x13, wzr, w14, x15         // encoding: [0xed,0xbf,0x2e,0x9b]
+// CHECK: smsubl   x16, w17, wzr, x18         // encoding: [0x30,0xca,0x3f,0x9b]
+// CHECK: smnegl   x19, w20, w21              // encoding: [0x93,0xfe,0x35,0x9b]
+
+        umaddl x3, w5, w2, x9
+        umaddl xzr, w10, w11, x12
+        umaddl x13, wzr, w14, x15
+        umaddl x16, w17, wzr, x18
+        umaddl x19, w20, w21, xzr
+// CHECK: umaddl   x3, w5, w2, x9             // encoding: [0xa3,0x24,0xa2,0x9b]
+// CHECK: umaddl   xzr, w10, w11, x12         // encoding: [0x5f,0x31,0xab,0x9b]
+// CHECK: umaddl   x13, wzr, w14, x15         // encoding: [0xed,0x3f,0xae,0x9b]
+// CHECK: umaddl   x16, w17, wzr, x18         // encoding: [0x30,0x4a,0xbf,0x9b]
+// CHECK: umull    x19, w20, w21              // encoding: [0x93,0x7e,0xb5,0x9b]
+
+
+
+        umsubl x3, w5, w2, x9
+        umsubl xzr, w10, w11, x12
+        umsubl x13, wzr, w14, x15
+        umsubl x16, w17, wzr, x18
+        umsubl x19, w20, w21, xzr
+// CHECK: umsubl   x3, w5, w2, x9             // encoding: [0xa3,0xa4,0xa2,0x9b]
+// CHECK: umsubl   xzr, w10, w11, x12         // encoding: [0x5f,0xb1,0xab,0x9b]
+// CHECK: umsubl   x13, wzr, w14, x15         // encoding: [0xed,0xbf,0xae,0x9b]
+// CHECK: umsubl   x16, w17, wzr, x18         // encoding: [0x30,0xca,0xbf,0x9b]
+// CHECK: umnegl   x19, w20, w21              // encoding: [0x93,0xfe,0xb5,0x9b]
+
+        smulh x30, x29, x28
+        smulh xzr, x27, x26
+        smulh x25, xzr, x24
+        smulh x23, x22, xzr
+// CHECK: smulh    x30, x29, x28              // encoding: [0xbe,0x7f,0x5c,0x9b]
+// CHECK: smulh    xzr, x27, x26              // encoding: [0x7f,0x7f,0x5a,0x9b]
+// CHECK: smulh    x25, xzr, x24              // encoding: [0xf9,0x7f,0x58,0x9b]
+// CHECK: smulh    x23, x22, xzr              // encoding: [0xd7,0x7e,0x5f,0x9b]
+
+        umulh x30, x29, x28
+        umulh xzr, x27, x26
+        umulh x25, xzr, x24
+        umulh x23, x22, xzr
+// CHECK: umulh    x30, x29, x28              // encoding: [0xbe,0x7f,0xdc,0x9b]
+// CHECK: umulh    xzr, x27, x26              // encoding: [0x7f,0x7f,0xda,0x9b]
+// CHECK: umulh    x25, xzr, x24              // encoding: [0xf9,0x7f,0xd8,0x9b]
+// CHECK: umulh    x23, x22, xzr              // encoding: [0xd7,0x7e,0xdf,0x9b]
+
+        mul w3, w4, w5
+        mul wzr, w6, w7
+        mul w8, wzr, w9
+        mul w10, w11, wzr
+
+        mul x12, x13, x14
+        mul xzr, x15, x16
+        mul x17, xzr, x18
+        mul x19, x20, xzr
+
+        mneg w21, w22, w23
+        mneg wzr, w24, w25
+        mneg w26, wzr, w27
+        mneg w28, w29, wzr
+
+        smull x11, w13, w17
+        umull x11, w13, w17
+        smnegl x11, w13, w17
+        umnegl x11, w13, w17
+// CHECK: mul      w3, w4, w5                 // encoding: [0x83,0x7c,0x05,0x1b]
+// CHECK: mul      wzr, w6, w7                // encoding: [0xdf,0x7c,0x07,0x1b]
+// CHECK: mul      w8, wzr, w9                // encoding: [0xe8,0x7f,0x09,0x1b]
+// CHECK: mul      w10, w11, wzr              // encoding: [0x6a,0x7d,0x1f,0x1b]
+// CHECK: mul      x12, x13, x14              // encoding: [0xac,0x7d,0x0e,0x9b]
+// CHECK: mul      xzr, x15, x16              // encoding: [0xff,0x7d,0x10,0x9b]
+// CHECK: mul      x17, xzr, x18              // encoding: [0xf1,0x7f,0x12,0x9b]
+// CHECK: mul      x19, x20, xzr              // encoding: [0x93,0x7e,0x1f,0x9b]
+// CHECK: mneg     w21, w22, w23              // encoding: [0xd5,0xfe,0x17,0x1b]
+// CHECK: mneg     wzr, w24, w25              // encoding: [0x1f,0xff,0x19,0x1b]
+// CHECK: mneg     w26, wzr, w27              // encoding: [0xfa,0xff,0x1b,0x1b]
+// CHECK: mneg     w28, w29, wzr              // encoding: [0xbc,0xff,0x1f,0x1b]
+// CHECK: smull    x11, w13, w17              // encoding: [0xab,0x7d,0x31,0x9b]
+// CHECK: umull    x11, w13, w17              // encoding: [0xab,0x7d,0xb1,0x9b]
+// CHECK: smnegl   x11, w13, w17              // encoding: [0xab,0xfd,0x31,0x9b]
+// CHECK: umnegl   x11, w13, w17              // encoding: [0xab,0xfd,0xb1,0x9b]
+
+//------------------------------------------------------------------------------
+// Exception generation
+//------------------------------------------------------------------------------
+        svc #0
+        svc #65535
+// CHECK: svc      #0                         // encoding: [0x01,0x00,0x00,0xd4]
+// CHECK: svc      #65535                     // encoding: [0xe1,0xff,0x1f,0xd4]
+
+        hvc #1
+        smc #12000
+        brk #12
+        hlt #123
+// CHECK: hvc      #1                         // encoding: [0x22,0x00,0x00,0xd4]
+// CHECK: smc      #12000                     // encoding: [0x03,0xdc,0x05,0xd4]
+// CHECK: brk      #12                        // encoding: [0x80,0x01,0x20,0xd4]
+// CHECK: hlt      #123                       // encoding: [0x60,0x0f,0x40,0xd4]
+
+        dcps1 #42
+        dcps2 #9
+        dcps3 #1000
+// CHECK: dcps1    #42                        // encoding: [0x41,0x05,0xa0,0xd4]
+// CHECK: dcps2    #9                         // encoding: [0x22,0x01,0xa0,0xd4]
+// CHECK: dcps3    #1000                      // encoding: [0x03,0x7d,0xa0,0xd4]
+
+        dcps1
+        dcps2
+        dcps3
+// CHECK: dcps1                               // encoding: [0x01,0x00,0xa0,0xd4]
+// CHECK: dcps2                               // encoding: [0x02,0x00,0xa0,0xd4]
+// CHECK: dcps3                               // encoding: [0x03,0x00,0xa0,0xd4]
+
+//------------------------------------------------------------------------------
+// Extract (immediate)
+//------------------------------------------------------------------------------
+
+        extr w3, w5, w7, #0
+        extr w11, w13, w17, #31
+// CHECK: extr     w3, w5, w7, #0             // encoding: [0xa3,0x00,0x87,0x13]
+// CHECK: extr     w11, w13, w17, #31         // encoding: [0xab,0x7d,0x91,0x13]
+
+        extr x3, x5, x7, #15
+        extr x11, x13, x17, #63
+// CHECK: extr     x3, x5, x7, #15            // encoding: [0xa3,0x3c,0xc7,0x93]
+// CHECK: extr     x11, x13, x17, #63         // encoding: [0xab,0xfd,0xd1,0x93]
+
+        ror x19, x23, #24
+        ror x29, xzr, #63
+// CHECK: extr     x19, x23, x23, #24         // encoding: [0xf3,0x62,0xd7,0x93]
+// CHECK: extr     x29, xzr, xzr, #63         // encoding: [0xfd,0xff,0xdf,0x93]
+
+        ror w9, w13, #31
+// CHECK: extr     w9, w13, w13, #31          // encoding: [0xa9,0x7d,0x8d,0x13]
+
+//------------------------------------------------------------------------------
+// Floating-point compare
+//------------------------------------------------------------------------------
+
+        fcmp s3, s5
+        fcmp s31, #0.0
+// CHECK: fcmp    s3, s5                  // encoding: [0x60,0x20,0x25,0x1e]
+// CHECK: fcmp    s31, #0.0               // encoding: [0xe8,0x23,0x20,0x1e]
+
+        fcmpe s29, s30
+        fcmpe s15, #0.0
+// CHECK: fcmpe   s29, s30                // encoding: [0xb0,0x23,0x3e,0x1e]
+// CHECK: fcmpe   s15, #0.0               // encoding: [0xf8,0x21,0x20,0x1e]
+
+        fcmp d4, d12
+        fcmp d23, #0.0
+// CHECK: fcmp    d4, d12                 // encoding: [0x80,0x20,0x6c,0x1e]
+// CHECK: fcmp    d23, #0.0               // encoding: [0xe8,0x22,0x60,0x1e]
+
+        fcmpe d26, d22
+        fcmpe d29, #0.0
+// CHECK: fcmpe   d26, d22                // encoding: [0x50,0x23,0x76,0x1e]
+// CHECK: fcmpe   d29, #0.0               // encoding: [0xb8,0x23,0x60,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fccmp s1, s31, #0, eq
+        fccmp s3, s0, #15, hs
+        fccmp s31, s15, #13, cs
+// CHECK: fccmp    s1, s31, #0, eq         // encoding: [0x20,0x04,0x3f,0x1e]
+// CHECK: fccmp    s3, s0, #15, hs         // encoding: [0x6f,0x24,0x20,0x1e]
+// CHECK: fccmp    s31, s15, #13, hs       // encoding: [0xed,0x27,0x2f,0x1e]
+
+        fccmp d9, d31, #0, le
+        fccmp d3, d0, #15, gt
+        fccmp d31, d5, #7, ne
+// CHECK: fccmp    d9, d31, #0, le         // encoding: [0x20,0xd5,0x7f,0x1e]
+// CHECK: fccmp    d3, d0, #15, gt         // encoding: [0x6f,0xc4,0x60,0x1e]
+// CHECK: fccmp    d31, d5, #7, ne         // encoding: [0xe7,0x17,0x65,0x1e]
+
+        fccmpe s1, s31, #0, eq
+        fccmpe s3, s0, #15, hs
+        fccmpe s31, s15, #13, cs
+// CHECK: fccmpe    s1, s31, #0, eq         // encoding: [0x30,0x04,0x3f,0x1e]
+// CHECK: fccmpe    s3, s0, #15, hs         // encoding: [0x7f,0x24,0x20,0x1e]
+// CHECK: fccmpe    s31, s15, #13, hs       // encoding: [0xfd,0x27,0x2f,0x1e]
+
+        fccmpe d9, d31, #0, le
+        fccmpe d3, d0, #15, gt
+        fccmpe d31, d5, #7, ne
+// CHECK: fccmpe    d9, d31, #0, le         // encoding: [0x30,0xd5,0x7f,0x1e]
+// CHECK: fccmpe    d3, d0, #15, gt         // encoding: [0x7f,0xc4,0x60,0x1e]
+// CHECK: fccmpe    d31, d5, #7, ne         // encoding: [0xf7,0x17,0x65,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcsel s3, s20, s9, pl
+        fcsel d9, d10, d11, mi
+// CHECK: fcsel   s3, s20, s9, pl         // encoding: [0x83,0x5e,0x29,0x1e]
+// CHECK: fcsel   d9, d10, d11, mi        // encoding: [0x49,0x4d,0x6b,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (1 source)
+//------------------------------------------------------------------------------
+
+        fmov s0, s1
+        fabs s2, s3
+        fneg s4, s5
+        fsqrt s6, s7
+        fcvt d8, s9
+        fcvt h10, s11
+        frintn s12, s13
+        frintp s14, s15
+        frintm s16, s17
+        frintz s18, s19
+        frinta s20, s21
+        frintx s22, s23
+        frinti s24, s25
+// CHECK: fmov     s0, s1                // encoding: [0x20,0x40,0x20,0x1e]
+// CHECK: fabs     s2, s3                // encoding: [0x62,0xc0,0x20,0x1e]
+// CHECK: fneg     s4, s5                     // encoding: [0xa4,0x40,0x21,0x1e]
+// CHECK: fsqrt    s6, s7                     // encoding: [0xe6,0xc0,0x21,0x1e]
+// CHECK: fcvt     d8, s9                     // encoding: [0x28,0xc1,0x22,0x1e]
+// CHECK: fcvt     h10, s11                   // encoding: [0x6a,0xc1,0x23,0x1e]
+// CHECK: frintn   s12, s13                   // encoding: [0xac,0x41,0x24,0x1e]
+// CHECK: frintp   s14, s15                   // encoding: [0xee,0xc1,0x24,0x1e]
+// CHECK: frintm   s16, s17                   // encoding: [0x30,0x42,0x25,0x1e]
+// CHECK: frintz   s18, s19                   // encoding: [0x72,0xc2,0x25,0x1e]
+// CHECK: frinta   s20, s21                   // encoding: [0xb4,0x42,0x26,0x1e]
+// CHECK: frintx   s22, s23                   // encoding: [0xf6,0x42,0x27,0x1e]
+// CHECK: frinti   s24, s25                   // encoding: [0x38,0xc3,0x27,0x1e]
+
+        fmov d0, d1
+        fabs d2, d3
+        fneg d4, d5
+        fsqrt d6, d7
+        fcvt s8, d9
+        fcvt h10, d11
+        frintn d12, d13
+        frintp d14, d15
+        frintm d16, d17
+        frintz d18, d19
+        frinta d20, d21
+        frintx d22, d23
+        frinti d24, d25
+// CHECK: fmov     d0, d1                     // encoding: [0x20,0x40,0x60,0x1e]
+// CHECK: fabs     d2, d3                     // encoding: [0x62,0xc0,0x60,0x1e]
+// CHECK: fneg     d4, d5                     // encoding: [0xa4,0x40,0x61,0x1e]
+// CHECK: fsqrt    d6, d7                     // encoding: [0xe6,0xc0,0x61,0x1e]
+// CHECK: fcvt     s8, d9                     // encoding: [0x28,0x41,0x62,0x1e]
+// CHECK: fcvt     h10, d11                   // encoding: [0x6a,0xc1,0x63,0x1e]
+// CHECK: frintn   d12, d13                   // encoding: [0xac,0x41,0x64,0x1e]
+// CHECK: frintp   d14, d15                   // encoding: [0xee,0xc1,0x64,0x1e]
+// CHECK: frintm   d16, d17                   // encoding: [0x30,0x42,0x65,0x1e]
+// CHECK: frintz   d18, d19                   // encoding: [0x72,0xc2,0x65,0x1e]
+// CHECK: frinta   d20, d21                   // encoding: [0xb4,0x42,0x66,0x1e]
+// CHECK: frintx   d22, d23                   // encoding: [0xf6,0x42,0x67,0x1e]
+// CHECK: frinti   d24, d25                   // encoding: [0x38,0xc3,0x67,0x1e]
+
+        fcvt s26, h27
+        fcvt d28, h29
+// CHECK: fcvt     s26, h27                   // encoding: [0x7a,0x43,0xe2,0x1e]
+// CHECK: fcvt     d28, h29                   // encoding: [0xbc,0xc3,0xe2,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (2 sources)
+//------------------------------------------------------------------------------
+
+        fmul s20, s19, s17
+        fdiv s1, s2, s3
+        fadd s4, s5, s6
+        fsub s7, s8, s9
+        fmax s10, s11, s12
+        fmin s13, s14, s15
+        fmaxnm s16, s17, s18
+        fminnm s19, s20, s21
+        fnmul s22, s23, s24
+// CHECK: fmul     s20, s19, s17              // encoding: [0x74,0x0a,0x31,0x1e]
+// CHECK: fdiv     s1, s2, s3                 // encoding: [0x41,0x18,0x23,0x1e]
+// CHECK: fadd     s4, s5, s6                 // encoding: [0xa4,0x28,0x26,0x1e]
+// CHECK: fsub     s7, s8, s9                 // encoding: [0x07,0x39,0x29,0x1e]
+// CHECK: fmax     s10, s11, s12              // encoding: [0x6a,0x49,0x2c,0x1e]
+// CHECK: fmin     s13, s14, s15              // encoding: [0xcd,0x59,0x2f,0x1e]
+// CHECK: fmaxnm   s16, s17, s18              // encoding: [0x30,0x6a,0x32,0x1e]
+// CHECK: fminnm   s19, s20, s21              // encoding: [0x93,0x7a,0x35,0x1e]
+// CHECK: fnmul    s22, s23, s24              // encoding: [0xf6,0x8a,0x38,0x1e]
+
+        fmul d20, d19, d17
+        fdiv d1, d2, d3
+        fadd d4, d5, d6
+        fsub d7, d8, d9
+        fmax d10, d11, d12
+        fmin d13, d14, d15
+        fmaxnm d16, d17, d18
+        fminnm d19, d20, d21
+        fnmul d22, d23, d24
+// CHECK: fmul     d20, d19, d17              // encoding: [0x74,0x0a,0x71,0x1e]
+// CHECK: fdiv     d1, d2, d3                 // encoding: [0x41,0x18,0x63,0x1e]
+// CHECK: fadd     d4, d5, d6                 // encoding: [0xa4,0x28,0x66,0x1e]
+// CHECK: fsub     d7, d8, d9                 // encoding: [0x07,0x39,0x69,0x1e]
+// CHECK: fmax     d10, d11, d12              // encoding: [0x6a,0x49,0x6c,0x1e]
+// CHECK: fmin     d13, d14, d15              // encoding: [0xcd,0x59,0x6f,0x1e]
+// CHECK: fmaxnm   d16, d17, d18              // encoding: [0x30,0x6a,0x72,0x1e]
+// CHECK: fminnm   d19, d20, d21              // encoding: [0x93,0x7a,0x75,0x1e]
+// CHECK: fnmul    d22, d23, d24              // encoding: [0xf6,0x8a,0x78,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (3 sources)
+//------------------------------------------------------------------------------
+
+        fmadd s3, s5, s6, s31
+        fmadd d3, d13, d0, d23
+        fmsub s3, s5, s6, s31
+        fmsub d3, d13, d0, d23
+        fnmadd s3, s5, s6, s31
+        fnmadd d3, d13, d0, d23
+        fnmsub s3, s5, s6, s31
+        fnmsub d3, d13, d0, d23
+// CHECK: fmadd   s3, s5, s6, s31         // encoding: [0xa3,0x7c,0x06,0x1f]
+// CHECK: fmadd   d3, d13, d0, d23        // encoding: [0xa3,0x5d,0x40,0x1f]
+// CHECK: fmsub   s3, s5, s6, s31         // encoding: [0xa3,0xfc,0x06,0x1f]
+// CHECK: fmsub   d3, d13, d0, d23        // encoding: [0xa3,0xdd,0x40,0x1f]
+// CHECK: fnmadd  s3, s5, s6, s31         // encoding: [0xa3,0x7c,0x26,0x1f]
+// CHECK: fnmadd  d3, d13, d0, d23        // encoding: [0xa3,0x5d,0x60,0x1f]
+// CHECK: fnmsub  s3, s5, s6, s31         // encoding: [0xa3,0xfc,0x26,0x1f]
+// CHECK: fnmsub  d3, d13, d0, d23        // encoding: [0xa3,0xdd,0x60,0x1f]
+
+//------------------------------------------------------------------------------
+// Floating-point <-> fixed-point conversion
+//------------------------------------------------------------------------------
+
+        fcvtzs w3, s5, #1
+        fcvtzs wzr, s20, #13
+        fcvtzs w19, s0, #32
+// CHECK: fcvtzs  w3, s5, #1              // encoding: [0xa3,0xfc,0x18,0x1e]
+// CHECK: fcvtzs  wzr, s20, #13           // encoding: [0x9f,0xce,0x18,0x1e]
+// CHECK: fcvtzs  w19, s0, #32            // encoding: [0x13,0x80,0x18,0x1e]
+
+        fcvtzs x3, s5, #1
+        fcvtzs x12, s30, #45
+        fcvtzs x19, s0, #64
+// CHECK: fcvtzs  x3, s5, #1              // encoding: [0xa3,0xfc,0x18,0x9e]
+// CHECK: fcvtzs  x12, s30, #45           // encoding: [0xcc,0x4f,0x18,0x9e]
+// CHECK: fcvtzs  x19, s0, #64            // encoding: [0x13,0x00,0x18,0x9e]
+
+        fcvtzs w3, d5, #1
+        fcvtzs wzr, d20, #13
+        fcvtzs w19, d0, #32
+// CHECK: fcvtzs  w3, d5, #1              // encoding: [0xa3,0xfc,0x58,0x1e]
+// CHECK: fcvtzs  wzr, d20, #13           // encoding: [0x9f,0xce,0x58,0x1e]
+// CHECK: fcvtzs  w19, d0, #32            // encoding: [0x13,0x80,0x58,0x1e]
+
+        fcvtzs x3, d5, #1
+        fcvtzs x12, d30, #45
+        fcvtzs x19, d0, #64
+// CHECK: fcvtzs  x3, d5, #1              // encoding: [0xa3,0xfc,0x58,0x9e]
+// CHECK: fcvtzs  x12, d30, #45           // encoding: [0xcc,0x4f,0x58,0x9e]
+// CHECK: fcvtzs  x19, d0, #64            // encoding: [0x13,0x00,0x58,0x9e]
+
+        fcvtzu w3, s5, #1
+        fcvtzu wzr, s20, #13
+        fcvtzu w19, s0, #32
+// CHECK: fcvtzu  w3, s5, #1              // encoding: [0xa3,0xfc,0x19,0x1e]
+// CHECK: fcvtzu  wzr, s20, #13           // encoding: [0x9f,0xce,0x19,0x1e]
+// CHECK: fcvtzu  w19, s0, #32            // encoding: [0x13,0x80,0x19,0x1e]
+
+        fcvtzu x3, s5, #1
+        fcvtzu x12, s30, #45
+        fcvtzu x19, s0, #64
+// CHECK: fcvtzu  x3, s5, #1              // encoding: [0xa3,0xfc,0x19,0x9e]
+// CHECK: fcvtzu  x12, s30, #45           // encoding: [0xcc,0x4f,0x19,0x9e]
+// CHECK: fcvtzu  x19, s0, #64            // encoding: [0x13,0x00,0x19,0x9e]
+
+        fcvtzu w3, d5, #1
+        fcvtzu wzr, d20, #13
+        fcvtzu w19, d0, #32
+// CHECK: fcvtzu  w3, d5, #1              // encoding: [0xa3,0xfc,0x59,0x1e]
+// CHECK: fcvtzu  wzr, d20, #13           // encoding: [0x9f,0xce,0x59,0x1e]
+// CHECK: fcvtzu  w19, d0, #32            // encoding: [0x13,0x80,0x59,0x1e]
+
+        fcvtzu x3, d5, #1
+        fcvtzu x12, d30, #45
+        fcvtzu x19, d0, #64
+// CHECK: fcvtzu  x3, d5, #1              // encoding: [0xa3,0xfc,0x59,0x9e]
+// CHECK: fcvtzu  x12, d30, #45           // encoding: [0xcc,0x4f,0x59,0x9e]
+// CHECK: fcvtzu  x19, d0, #64            // encoding: [0x13,0x00,0x59,0x9e]
+
+        scvtf s23, w19, #1
+        scvtf s31, wzr, #20
+        scvtf s14, w0, #32
+// CHECK: scvtf   s23, w19, #1            // encoding: [0x77,0xfe,0x02,0x1e]
+// CHECK: scvtf   s31, wzr, #20           // encoding: [0xff,0xb3,0x02,0x1e]
+// CHECK: scvtf   s14, w0, #32            // encoding: [0x0e,0x80,0x02,0x1e]
+
+        scvtf s23, x19, #1
+        scvtf s31, xzr, #20
+        scvtf s14, x0, #64
+// CHECK: scvtf   s23, x19, #1            // encoding: [0x77,0xfe,0x02,0x9e]
+// CHECK: scvtf   s31, xzr, #20           // encoding: [0xff,0xb3,0x02,0x9e]
+// CHECK: scvtf   s14, x0, #64            // encoding: [0x0e,0x00,0x02,0x9e]
+
+        scvtf d23, w19, #1
+        scvtf d31, wzr, #20
+        scvtf d14, w0, #32
+// CHECK: scvtf   d23, w19, #1            // encoding: [0x77,0xfe,0x42,0x1e]
+// CHECK: scvtf   d31, wzr, #20           // encoding: [0xff,0xb3,0x42,0x1e]
+// CHECK: scvtf   d14, w0, #32            // encoding: [0x0e,0x80,0x42,0x1e]
+
+        scvtf d23, x19, #1
+        scvtf d31, xzr, #20
+        scvtf d14, x0, #64
+// CHECK: scvtf   d23, x19, #1            // encoding: [0x77,0xfe,0x42,0x9e]
+// CHECK: scvtf   d31, xzr, #20           // encoding: [0xff,0xb3,0x42,0x9e]
+// CHECK: scvtf   d14, x0, #64            // encoding: [0x0e,0x00,0x42,0x9e]
+
+        ucvtf s23, w19, #1
+        ucvtf s31, wzr, #20
+        ucvtf s14, w0, #32
+// CHECK: ucvtf   s23, w19, #1            // encoding: [0x77,0xfe,0x03,0x1e]
+// CHECK: ucvtf   s31, wzr, #20           // encoding: [0xff,0xb3,0x03,0x1e]
+// CHECK: ucvtf   s14, w0, #32            // encoding: [0x0e,0x80,0x03,0x1e]
+
+        ucvtf s23, x19, #1
+        ucvtf s31, xzr, #20
+        ucvtf s14, x0, #64
+// CHECK: ucvtf   s23, x19, #1            // encoding: [0x77,0xfe,0x03,0x9e]
+// CHECK: ucvtf   s31, xzr, #20           // encoding: [0xff,0xb3,0x03,0x9e]
+// CHECK: ucvtf   s14, x0, #64            // encoding: [0x0e,0x00,0x03,0x9e]
+
+        ucvtf d23, w19, #1
+        ucvtf d31, wzr, #20
+        ucvtf d14, w0, #32
+// CHECK: ucvtf   d23, w19, #1            // encoding: [0x77,0xfe,0x43,0x1e]
+// CHECK: ucvtf   d31, wzr, #20           // encoding: [0xff,0xb3,0x43,0x1e]
+// CHECK: ucvtf   d14, w0, #32            // encoding: [0x0e,0x80,0x43,0x1e]
+
+        ucvtf d23, x19, #1
+        ucvtf d31, xzr, #20
+        ucvtf d14, x0, #64
+// CHECK: ucvtf   d23, x19, #1            // encoding: [0x77,0xfe,0x43,0x9e]
+// CHECK: ucvtf   d31, xzr, #20           // encoding: [0xff,0xb3,0x43,0x9e]
+// CHECK: ucvtf   d14, x0, #64            // encoding: [0x0e,0x00,0x43,0x9e]
+
+//------------------------------------------------------------------------------
+// Floating-point <-> integer conversion
+//------------------------------------------------------------------------------
+        fcvtns w3, s31
+        fcvtns xzr, s12
+        fcvtnu wzr, s12
+        fcvtnu x0, s0
+// CHECK: fcvtns   w3, s31                    // encoding: [0xe3,0x03,0x20,0x1e]
+// CHECK: fcvtns   xzr, s12                   // encoding: [0x9f,0x01,0x20,0x9e]
+// CHECK: fcvtnu   wzr, s12                   // encoding: [0x9f,0x01,0x21,0x1e]
+// CHECK: fcvtnu   x0, s0                     // encoding: [0x00,0x00,0x21,0x9e]
+
+        fcvtps wzr, s9
+        fcvtps x12, s20
+        fcvtpu w30, s23
+        fcvtpu x29, s3
+// CHECK: fcvtps   wzr, s9                    // encoding: [0x3f,0x01,0x28,0x1e]
+// CHECK: fcvtps   x12, s20                   // encoding: [0x8c,0x02,0x28,0x9e]
+// CHECK: fcvtpu   w30, s23                   // encoding: [0xfe,0x02,0x29,0x1e]
+// CHECK: fcvtpu   x29, s3                    // encoding: [0x7d,0x00,0x29,0x9e]
+
+        fcvtms w2, s3
+        fcvtms x4, s5
+        fcvtmu w6, s7
+        fcvtmu x8, s9
+// CHECK: fcvtms   w2, s3                     // encoding: [0x62,0x00,0x30,0x1e]
+// CHECK: fcvtms   x4, s5                     // encoding: [0xa4,0x00,0x30,0x9e]
+// CHECK: fcvtmu   w6, s7                     // encoding: [0xe6,0x00,0x31,0x1e]
+// CHECK: fcvtmu   x8, s9                     // encoding: [0x28,0x01,0x31,0x9e]
+
+        fcvtzs w10, s11
+        fcvtzs x12, s13
+        fcvtzu w14, s15
+        fcvtzu x15, s16
+// CHECK: fcvtzs   w10, s11                   // encoding: [0x6a,0x01,0x38,0x1e]
+// CHECK: fcvtzs   x12, s13                   // encoding: [0xac,0x01,0x38,0x9e]
+// CHECK: fcvtzu   w14, s15                   // encoding: [0xee,0x01,0x39,0x1e]
+// CHECK: fcvtzu   x15, s16                   // encoding: [0x0f,0x02,0x39,0x9e]
+
+        scvtf s17, w18
+        scvtf s19, x20
+        ucvtf s21, w22
+        scvtf s23, x24
+// CHECK: scvtf    s17, w18                   // encoding: [0x51,0x02,0x22,0x1e]
+// CHECK: scvtf    s19, x20                   // encoding: [0x93,0x02,0x22,0x9e]
+// CHECK: ucvtf    s21, w22                   // encoding: [0xd5,0x02,0x23,0x1e]
+// CHECK: scvtf    s23, x24                   // encoding: [0x17,0x03,0x22,0x9e]
+
+        fcvtas w25, s26
+        fcvtas x27, s28
+        fcvtau w29, s30
+        fcvtau xzr, s0
+// CHECK: fcvtas   w25, s26                   // encoding: [0x59,0x03,0x24,0x1e]
+// CHECK: fcvtas   x27, s28                   // encoding: [0x9b,0x03,0x24,0x9e]
+// CHECK: fcvtau   w29, s30                   // encoding: [0xdd,0x03,0x25,0x1e]
+// CHECK: fcvtau   xzr, s0                    // encoding: [0x1f,0x00,0x25,0x9e]
+
+        fcvtns w3, d31
+        fcvtns xzr, d12
+        fcvtnu wzr, d12
+        fcvtnu x0, d0
+// CHECK: fcvtns   w3, d31                    // encoding: [0xe3,0x03,0x60,0x1e]
+// CHECK: fcvtns   xzr, d12                   // encoding: [0x9f,0x01,0x60,0x9e]
+// CHECK: fcvtnu   wzr, d12                   // encoding: [0x9f,0x01,0x61,0x1e]
+// CHECK: fcvtnu   x0, d0                     // encoding: [0x00,0x00,0x61,0x9e]
+
+        fcvtps wzr, d9
+        fcvtps x12, d20
+        fcvtpu w30, d23
+        fcvtpu x29, d3
+// CHECK: fcvtps   wzr, d9                    // encoding: [0x3f,0x01,0x68,0x1e]
+// CHECK: fcvtps   x12, d20                   // encoding: [0x8c,0x02,0x68,0x9e]
+// CHECK: fcvtpu   w30, d23                   // encoding: [0xfe,0x02,0x69,0x1e]
+// CHECK: fcvtpu   x29, d3                    // encoding: [0x7d,0x00,0x69,0x9e]
+
+        fcvtms w2, d3
+        fcvtms x4, d5
+        fcvtmu w6, d7
+        fcvtmu x8, d9
+// CHECK: fcvtms   w2, d3                     // encoding: [0x62,0x00,0x70,0x1e]
+// CHECK: fcvtms   x4, d5                     // encoding: [0xa4,0x00,0x70,0x9e]
+// CHECK: fcvtmu   w6, d7                     // encoding: [0xe6,0x00,0x71,0x1e]
+// CHECK: fcvtmu   x8, d9                     // encoding: [0x28,0x01,0x71,0x9e]
+
+        fcvtzs w10, d11
+        fcvtzs x12, d13
+        fcvtzu w14, d15
+        fcvtzu x15, d16
+// CHECK: fcvtzs   w10, d11                   // encoding: [0x6a,0x01,0x78,0x1e]
+// CHECK: fcvtzs   x12, d13                   // encoding: [0xac,0x01,0x78,0x9e]
+// CHECK: fcvtzu   w14, d15                   // encoding: [0xee,0x01,0x79,0x1e]
+// CHECK: fcvtzu   x15, d16                   // encoding: [0x0f,0x02,0x79,0x9e]
+
+        scvtf d17, w18
+        scvtf d19, x20
+        ucvtf d21, w22
+        ucvtf d23, x24
+// CHECK: scvtf    d17, w18                   // encoding: [0x51,0x02,0x62,0x1e]
+// CHECK: scvtf    d19, x20                   // encoding: [0x93,0x02,0x62,0x9e]
+// CHECK: ucvtf    d21, w22                   // encoding: [0xd5,0x02,0x63,0x1e]
+// CHECK: ucvtf    d23, x24                   // encoding: [0x17,0x03,0x63,0x9e]
+
+        fcvtas w25, d26
+        fcvtas x27, d28
+        fcvtau w29, d30
+        fcvtau xzr, d0
+// CHECK: fcvtas   w25, d26                   // encoding: [0x59,0x03,0x64,0x1e]
+// CHECK: fcvtas   x27, d28                   // encoding: [0x9b,0x03,0x64,0x9e]
+// CHECK: fcvtau   w29, d30                   // encoding: [0xdd,0x03,0x65,0x1e]
+// CHECK: fcvtau   xzr, d0                    // encoding: [0x1f,0x00,0x65,0x9e]
+
+        fmov w3, s9
+        fmov s9, w3
+// CHECK: fmov     w3, s9                     // encoding: [0x23,0x01,0x26,0x1e]
+// CHECK: fmov     s9, w3                     // encoding: [0x69,0x00,0x27,0x1e]
+
+        fmov x20, d31
+        fmov d1, x15
+// CHECK: fmov     x20, d31                   // encoding: [0xf4,0x03,0x66,0x9e]
+// CHECK: fmov     d1, x15                    // encoding: [0xe1,0x01,0x67,0x9e]
+
+        fmov x3, v12.d[1]
+        fmov v1.d[1], x19
+        fmov v3.2d[1], xzr
+// CHECK: fmov     x3, v12.d[1]               // encoding: [0x83,0x01,0xae,0x9e]
+// CHECK: fmov     v1.d[1], x19               // encoding: [0x61,0x02,0xaf,0x9e]
+// CHECK: fmov     v3.d[1], xzr               // encoding: [0xe3,0x03,0xaf,0x9e]
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+
+        fmov s2, #0.125
+        fmov s3, #1.0
+        fmov d30, #16.0
+// CHECK: fmov     s2, #0.12500000            // encoding: [0x02,0x10,0x28,0x1e]
+// CHECK: fmov     s3, #1.00000000            // encoding: [0x03,0x10,0x2e,0x1e]
+// CHECK: fmov     d30, #16.00000000          // encoding: [0x1e,0x10,0x66,0x1e]
+
+        fmov s4, #1.0625
+        fmov d10, #1.9375
+// CHECK: fmov     s4, #1.06250000            // encoding: [0x04,0x30,0x2e,0x1e]
+// CHECK: fmov     d10, #1.93750000           // encoding: [0x0a,0xf0,0x6f,0x1e]
+
+        fmov s12, #-1.0
+// CHECK: fmov     s12, #-1.00000000          // encoding: [0x0c,0x10,0x3e,0x1e]
+
+        fmov d16, #8.5
+// CHECK: fmov     d16, #8.50000000           // encoding: [0x10,0x30,0x64,0x1e]
+
+//------------------------------------------------------------------------------
+// Load-register (literal)
+//------------------------------------------------------------------------------
+        ldr w3, here
+        ldr x29, there
+        ldrsw xzr, everywhere
+// CHECK: ldr     w3, here                // encoding: [0x03'A',A,A,0x18'A']
+// CHECK:                                 //   fixup A - offset: 0, value: here, kind: fixup_a64_ld_prel
+// CHECK: ldr     x29, there              // encoding: [0x1d'A',A,A,0x58'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_a64_ld_prel
+// CHECK: ldrsw   xzr, everywhere         // encoding: [0x1f'A',A,A,0x98'A']
+// CHECK:                                 //   fixup A - offset: 0, value: everywhere, kind: fixup_a64_ld_prel
+
+        ldr s0, who_knows
+        ldr d0, i_dont
+        ldr q0, there_must_be_a_better_way
+// CHECK: ldr     s0, who_knows           // encoding: [A,A,A,0x1c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: who_knows, kind: fixup_a64_ld_prel
+// CHECK: ldr     d0, i_dont              // encoding: [A,A,A,0x5c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: i_dont, kind: fixup_a64_ld_prel
+// CHECK: ldr     q0, there_must_be_a_better_way // encoding: [A,A,A,0x9c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_a64_ld_prel
+
+        ldr w0, #1048572
+        ldr x10, #-1048576
+// CHECK: ldr     w0, #1048572            // encoding: [0xe0,0xff,0x7f,0x18]
+// CHECK: ldr     x10, #-1048576          // encoding: [0x0a,0x00,0x80,0x58]
+
+        prfm pldl1strm, nowhere
+        prfm #22, somewhere
+// CHECK: prfm    pldl1strm, nowhere      // encoding: [0x01'A',A,A,0xd8'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_ld_prel
+// CHECK: prfm    #22, somewhere          // encoding: [0x16'A',A,A,0xd8'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_ld_prel
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+
+        fmov s2, #0.125
+        fmov s3, #1.0
+        fmov d30, #16.0
+// CHECK: fmov     s2, #0.12500000            // encoding: [0x02,0x10,0x28,0x1e]
+// CHECK: fmov     s3, #1.00000000            // encoding: [0x03,0x10,0x2e,0x1e]
+// CHECK: fmov     d30, #16.00000000          // encoding: [0x1e,0x10,0x66,0x1e]
+
+        fmov s4, #1.0625
+        fmov d10, #1.9375
+// CHECK: fmov     s4, #1.06250000            // encoding: [0x04,0x30,0x2e,0x1e]
+// CHECK: fmov     d10, #1.93750000           // encoding: [0x0a,0xf0,0x6f,0x1e]
+
+        fmov s12, #-1.0
+// CHECK: fmov     s12, #-1.00000000          // encoding: [0x0c,0x10,0x3e,0x1e]
+
+        fmov d16, #8.5
+// CHECK: fmov     d16, #8.50000000           // encoding: [0x10,0x30,0x64,0x1e]
+
+//------------------------------------------------------------------------------
+// Load/store exclusive
+//------------------------------------------------------------------------------
+
+        stxrb      w1, w2, [x3, #0]
+        stxrh      w2, w3, [x4]
+        stxr       wzr, w4, [sp]
+        stxr       w5, x6, [x7]
+// CHECK: stxrb    w1, w2, [x3]              // encoding: [0x62,0x7c,0x01,0x08]
+// CHECK: stxrh    w2, w3, [x4]              // encoding: [0x83,0x7c,0x02,0x48]
+// CHECK: stxr     wzr, w4, [sp]             // encoding: [0xe4,0x7f,0x1f,0x88]
+// CHECK: stxr     w5, x6, [x7]              // encoding: [0xe6,0x7c,0x05,0xc8]
+
+        ldxrb      w7, [x9]
+        ldxrh      wzr, [x10]
+        ldxr       w9, [sp]
+        ldxr       x10, [x11]
+// CHECK: ldxrb    w7, [x9]                  // encoding: [0x27,0x7d,0x5f,0x08]
+// CHECK: ldxrh    wzr, [x10]                // encoding: [0x5f,0x7d,0x5f,0x48]
+// CHECK: ldxr     w9, [sp]                  // encoding: [0xe9,0x7f,0x5f,0x88]
+// CHECK: ldxr     x10, [x11]                // encoding: [0x6a,0x7d,0x5f,0xc8]
+
+        stxp       w11, w12, w13, [x14]
+        stxp       wzr, x23, x14, [x15]
+// CHECK: stxp     w11, w12, w13, [x14]      // encoding: [0xcc,0x35,0x2b,0x88]
+// CHECK: stxp     wzr, x23, x14, [x15]      // encoding: [0xf7,0x39,0x3f,0xc8]
+
+        ldxp       w12, wzr, [sp]
+        ldxp       x13, x14, [x15]
+// CHECK: ldxp     w12, wzr, [sp]            // encoding: [0xec,0x7f,0x7f,0x88]
+// CHECK: ldxp     x13, x14, [x15]           // encoding: [0xed,0x39,0x7f,0xc8]
+
+        stlxrb     w14, w15, [x16]
+        stlxrh     w15, w16, [x17,#0]
+        stlxr      wzr, w17, [sp]
+        stlxr      w18, x19, [x20]
+// CHECK: stlxrb   w14, w15, [x16]           // encoding: [0x0f,0xfe,0x0e,0x08]
+// CHECK: stlxrh   w15, w16, [x17]           // encoding: [0x30,0xfe,0x0f,0x48]
+// CHECK: stlxr    wzr, w17, [sp]            // encoding: [0xf1,0xff,0x1f,0x88]
+// CHECK: stlxr    w18, x19, [x20]           // encoding: [0x93,0xfe,0x12,0xc8]
+
+        ldaxrb     w19, [x21]
+        ldaxrh     w20, [sp]
+        ldaxr      wzr, [x22]
+        ldaxr      x21, [x23]
+// CHECK: ldaxrb   w19, [x21]                // encoding: [0xb3,0xfe,0x5f,0x08]
+// CHECK: ldaxrh   w20, [sp]                 // encoding: [0xf4,0xff,0x5f,0x48]
+// CHECK: ldaxr    wzr, [x22]                // encoding: [0xdf,0xfe,0x5f,0x88]
+// CHECK: ldaxr    x21, [x23]                // encoding: [0xf5,0xfe,0x5f,0xc8]
+
+        stlxp      wzr, w22, w23, [x24]
+        stlxp      w25, x26, x27, [sp]
+// CHECK: stlxp    wzr, w22, w23, [x24]      // encoding: [0x16,0xdf,0x3f,0x88]
+// CHECK: stlxp    w25, x26, x27, [sp]       // encoding: [0xfa,0xef,0x39,0xc8]
+
+        ldaxp      w26, wzr, [sp]
+        ldaxp      x27, x28, [x30]
+// CHECK: ldaxp    w26, wzr, [sp]            // encoding: [0xfa,0xff,0x7f,0x88]
+// CHECK: ldaxp    x27, x28, [x30]           // encoding: [0xdb,0xf3,0x7f,0xc8]
+
+        stlrb      w27, [sp]
+        stlrh      w28, [x0]
+        stlr       wzr, [x1]
+        stlr       x30, [x2]
+// CHECK: stlrb    w27, [sp]                 // encoding: [0xfb,0xff,0x9f,0x08]
+// CHECK: stlrh    w28, [x0]                 // encoding: [0x1c,0xfc,0x9f,0x48]
+// CHECK: stlr     wzr, [x1]                 // encoding: [0x3f,0xfc,0x9f,0x88]
+// CHECK: stlr     x30, [x2]                 // encoding: [0x5e,0xfc,0x9f,0xc8]
+
+        ldarb      w29, [sp]
+        ldarh      w30, [x0]
+        ldar       wzr, [x1]
+        ldar       x1, [x2]
+// CHECK: ldarb    w29, [sp]                 // encoding: [0xfd,0xff,0xdf,0x08]
+// CHECK: ldarh    w30, [x0]                 // encoding: [0x1e,0xfc,0xdf,0x48]
+// CHECK: ldar     wzr, [x1]                 // encoding: [0x3f,0xfc,0xdf,0x88]
+// CHECK: ldar     x1, [x2]                  // encoding: [0x41,0xfc,0xdf,0xc8]
+
+        stlxp      wzr, w22, w23, [x24,#0]
+// CHECK: stlxp    wzr, w22, w23, [x24]      // encoding: [0x16,0xdf,0x3f,0x88]
+
+//------------------------------------------------------------------------------
+// Load/store (unaligned immediate)
+//------------------------------------------------------------------------------
+
+        sturb w9, [sp, #0]
+        sturh wzr, [x12, #255]
+        stur w16, [x0, #-256]
+        stur x28, [x14, #1]
+// CHECK: sturb    w9, [sp]                   // encoding: [0xe9,0x03,0x00,0x38]
+// CHECK: sturh    wzr, [x12, #255]           // encoding: [0x9f,0xf1,0x0f,0x78]
+// CHECK: stur     w16, [x0, #-256]           // encoding: [0x10,0x00,0x10,0xb8]
+// CHECK: stur     x28, [x14, #1]             // encoding: [0xdc,0x11,0x00,0xf8]
+
+        ldurb w1, [x20, #255]
+        ldurh w20, [x1, #255]
+        ldur w12, [sp, #255]
+        ldur xzr, [x12, #255]
+// CHECK: ldurb    w1, [x20, #255]            // encoding: [0x81,0xf2,0x4f,0x38]
+// CHECK: ldurh    w20, [x1, #255]            // encoding: [0x34,0xf0,0x4f,0x78]
+// CHECK: ldur     w12, [sp, #255]            // encoding: [0xec,0xf3,0x4f,0xb8]
+// CHECK: ldur     xzr, [x12, #255]           // encoding: [0x9f,0xf1,0x4f,0xf8]
+
+        ldursb x9, [x7, #-256]
+        ldursh x17, [x19, #-256]
+        ldursw x20, [x15, #-256]
+        ldursw x13, [x2]
+        prfum pldl2keep, [sp, #-256]
+        ldursb w19, [x1, #-256]
+        ldursh w15, [x21, #-256]
+// CHECK: ldursb   x9, [x7, #-256]            // encoding: [0xe9,0x00,0x90,0x38]
+// CHECK: ldursh   x17, [x19, #-256]          // encoding: [0x71,0x02,0x90,0x78]
+// CHECK: ldursw   x20, [x15, #-256]          // encoding: [0xf4,0x01,0x90,0xb8]
+// CHECK: ldursw   x13, [x2]                  // encoding: [0x4d,0x00,0x80,0xb8]
+// CHECK: prfum    pldl2keep, [sp, #-256]     // encoding: [0xe2,0x03,0x90,0xf8]
+// CHECK: ldursb   w19, [x1, #-256]           // encoding: [0x33,0x00,0xd0,0x38]
+// CHECK: ldursh   w15, [x21, #-256]          // encoding: [0xaf,0x02,0xd0,0x78]
+
+        stur b0, [sp, #1]
+        stur h12, [x12, #-1]
+        stur s15, [x0, #255]
+        stur d31, [x5, #25]
+        stur q9, [x5]
+// CHECK: stur     b0, [sp, #1]               // encoding: [0xe0,0x13,0x00,0x3c]
+// CHECK: stur     h12, [x12, #-1]            // encoding: [0x8c,0xf1,0x1f,0x7c]
+// CHECK: stur     s15, [x0, #255]            // encoding: [0x0f,0xf0,0x0f,0xbc]
+// CHECK: stur     d31, [x5, #25]             // encoding: [0xbf,0x90,0x01,0xfc]
+// CHECK: stur     q9, [x5]                   // encoding: [0xa9,0x00,0x80,0x3c]
+
+        ldur b3, [sp]
+        ldur h5, [x4, #-256]
+        ldur s7, [x12, #-1]
+        ldur d11, [x19, #4]
+        ldur q13, [x1, #2]
+// CHECK: ldur     b3, [sp]                   // encoding: [0xe3,0x03,0x40,0x3c]
+// CHECK: ldur     h5, [x4, #-256]            // encoding: [0x85,0x00,0x50,0x7c]
+// CHECK: ldur     s7, [x12, #-1]             // encoding: [0x87,0xf1,0x5f,0xbc]
+// CHECK: ldur     d11, [x19, #4]             // encoding: [0x6b,0x42,0x40,0xfc]
+// CHECK: ldur     q13, [x1, #2]              // encoding: [0x2d,0x20,0xc0,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store (unsigned immediate)
+//------------------------------------------------------------------------------
+
+//// Basic addressing mode limits: 8 byte access
+        ldr x0, [x0]
+        ldr x4, [x29, #0]
+        ldr x30, [x12, #32760]
+        ldr x20, [sp, #8]
+// CHECK: ldr      x0, [x0]                   // encoding: [0x00,0x00,0x40,0xf9]
+// CHECK: ldr      x4, [x29]                  // encoding: [0xa4,0x03,0x40,0xf9]
+// CHECK: ldr      x30, [x12, #32760]         // encoding: [0x9e,0xfd,0x7f,0xf9]
+// CHECK: ldr      x20, [sp, #8]              // encoding: [0xf4,0x07,0x40,0xf9]
+
+//// Rt treats 31 as zero-register
+        ldr xzr, [sp]
+// CHECK: ldr      xzr, [sp]                  // encoding: [0xff,0x03,0x40,0xf9]
+
+        //// 4-byte load, check still 64-bit address, limits
+        ldr w2, [sp]
+        ldr w17, [sp, #16380]
+        ldr w13, [x2, #4]
+// CHECK: ldr      w2, [sp]                   // encoding: [0xe2,0x03,0x40,0xb9]
+// CHECK: ldr      w17, [sp, #16380]          // encoding: [0xf1,0xff,0x7f,0xb9]
+// CHECK: ldr      w13, [x2, #4]              // encoding: [0x4d,0x04,0x40,0xb9]
+
+//// Signed 4-byte load. Limits.
+        ldrsw x2, [x5,#4]
+        ldrsw x23, [sp, #16380]
+// CHECK: ldrsw    x2, [x5, #4]               // encoding: [0xa2,0x04,0x80,0xb9]
+// CHECK: ldrsw    x23, [sp, #16380]          // encoding: [0xf7,0xff,0xbf,0xb9]
+
+////  2-byte loads
+        ldrh w2, [x4]
+        ldrsh w23, [x6, #8190]
+        ldrsh wzr, [sp, #2]
+        ldrsh x29, [x2, #2]
+// CHECK: ldrh     w2, [x4]                   // encoding: [0x82,0x00,0x40,0x79]
+// CHECK: ldrsh    w23, [x6, #8190]           // encoding: [0xd7,0xfc,0xff,0x79]
+// CHECK: ldrsh    wzr, [sp, #2]              // encoding: [0xff,0x07,0xc0,0x79]
+// CHECK: ldrsh    x29, [x2, #2]              // encoding: [0x5d,0x04,0x80,0x79]
+
+//// 1-byte loads
+        ldrb w26, [x3, #121]
+        ldrb w12, [x2, #0]
+        ldrsb w27, [sp, #4095]
+        ldrsb xzr, [x15]
+// CHECK: ldrb     w26, [x3, #121]            // encoding: [0x7a,0xe4,0x41,0x39]
+// CHECK: ldrb     w12, [x2]                  // encoding: [0x4c,0x00,0x40,0x39]
+// CHECK: ldrsb    w27, [sp, #4095]           // encoding: [0xfb,0xff,0xff,0x39]
+// CHECK: ldrsb    xzr, [x15]                 // encoding: [0xff,0x01,0x80,0x39]
+
+//// Stores
+        str x30, [sp]
+        str w20, [x4, #16380]
+        strh w20, [x10, #14]
+        strh w17, [sp, #8190]
+        strb w23, [x3, #4095]
+        strb wzr, [x2]
+// CHECK: str      x30, [sp]                  // encoding: [0xfe,0x03,0x00,0xf9]
+// CHECK: str      w20, [x4, #16380]          // encoding: [0x94,0xfc,0x3f,0xb9]
+// CHECK: strh     w20, [x10, #14]            // encoding: [0x54,0x1d,0x00,0x79]
+// CHECK: strh     w17, [sp, #8190]           // encoding: [0xf1,0xff,0x3f,0x79]
+// CHECK: strb     w23, [x3, #4095]           // encoding: [0x77,0xfc,0x3f,0x39]
+// CHECK: strb     wzr, [x2]                  // encoding: [0x5f,0x00,0x00,0x39]
+
+//// Relocations
+        str x15, [x5, #:lo12:sym]
+        ldrb w15, [x5, #:lo12:sym]
+        ldrsh x15, [x5, #:lo12:sym]
+        ldrsw x15, [x5, #:lo12:sym]
+        ldr x15, [x5, #:lo12:sym]
+        ldr q3, [x2, #:lo12:sym]
+// CHECK: str     x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,A,0xf9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12
+// CHECK: ldrb    w15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x40'A',0x39'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst8_lo12
+// CHECK: ldrsh   x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x80'A',0x79'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst16_lo12
+// CHECK: ldrsw   x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x80'A',0xb9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst32_lo12
+// CHECK: ldr     x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x40'A',0xf9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12
+// CHECK: ldr     q3, [x2, #:lo12:sym]    // encoding: [0x43'A',A,0xc0'A',0x3d'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst128_lo12
+
+        prfm pldl1keep, [sp, #8]
+        prfm pldl1strm, [x3]
+        prfm pldl2keep, [x5,#16]
+        prfm pldl2strm, [x2]
+        prfm pldl3keep, [x5]
+        prfm pldl3strm, [x6]
+        prfm plil1keep, [sp, #8]
+        prfm plil1strm, [x3]
+        prfm plil2keep, [x5,#16]
+        prfm plil2strm, [x2]
+        prfm plil3keep, [x5]
+        prfm plil3strm, [x6]
+        prfm pstl1keep, [sp, #8]
+        prfm pstl1strm, [x3]
+        prfm pstl2keep, [x5,#16]
+        prfm pstl2strm, [x2]
+        prfm pstl3keep, [x5]
+        prfm pstl3strm, [x6]
+        prfm #15, [sp]
+// CHECK: prfm    pldl1keep, [sp, #8]     // encoding: [0xe0,0x07,0x80,0xf9]
+// CHECK: prfm    pldl1strm, [x3, #0]     // encoding: [0x61,0x00,0x80,0xf9]
+// CHECK: prfm    pldl2keep, [x5, #16]    // encoding: [0xa2,0x08,0x80,0xf9]
+// CHECK: prfm    pldl2strm, [x2, #0]     // encoding: [0x43,0x00,0x80,0xf9]
+// CHECK: prfm    pldl3keep, [x5, #0]     // encoding: [0xa4,0x00,0x80,0xf9]
+// CHECK: prfm    pldl3strm, [x6, #0]     // encoding: [0xc5,0x00,0x80,0xf9]
+// CHECK: prfm    plil1keep, [sp, #8]     // encoding: [0xe8,0x07,0x80,0xf9]
+// CHECK: prfm    plil1strm, [x3, #0]     // encoding: [0x69,0x00,0x80,0xf9]
+// CHECK: prfm    plil2keep, [x5, #16]    // encoding: [0xaa,0x08,0x80,0xf9]
+// CHECK: prfm    plil2strm, [x2, #0]     // encoding: [0x4b,0x00,0x80,0xf9]
+// CHECK: prfm    plil3keep, [x5, #0]     // encoding: [0xac,0x00,0x80,0xf9]
+// CHECK: prfm    plil3strm, [x6, #0]     // encoding: [0xcd,0x00,0x80,0xf9]
+// CHECK: prfm    pstl1keep, [sp, #8]     // encoding: [0xf0,0x07,0x80,0xf9]
+// CHECK: prfm    pstl1strm, [x3, #0]     // encoding: [0x71,0x00,0x80,0xf9]
+// CHECK: prfm    pstl2keep, [x5, #16]    // encoding: [0xb2,0x08,0x80,0xf9]
+// CHECK: prfm    pstl2strm, [x2, #0]     // encoding: [0x53,0x00,0x80,0xf9]
+// CHECK: prfm    pstl3keep, [x5, #0]     // encoding: [0xb4,0x00,0x80,0xf9]
+// CHECK: prfm    pstl3strm, [x6, #0]     // encoding: [0xd5,0x00,0x80,0xf9]
+// CHECK: prfm    #15, [sp, #0]           // encoding: [0xef,0x03,0x80,0xf9]
+
+//// Floating-point versions
+
+        ldr b31, [sp, #4095]
+        ldr h20, [x2, #8190]
+        ldr s10, [x19, #16380]
+        ldr d3, [x10, #32760]
+        str q12, [sp, #65520]
+// CHECK: ldr      b31, [sp, #4095]           // encoding: [0xff,0xff,0x7f,0x3d]
+// CHECK: ldr      h20, [x2, #8190]           // encoding: [0x54,0xfc,0x7f,0x7d]
+// CHECK: ldr      s10, [x19, #16380]         // encoding: [0x6a,0xfe,0x7f,0xbd]
+// CHECK: ldr      d3, [x10, #32760]          // encoding: [0x43,0xfd,0x7f,0xfd]
+// CHECK: str      q12, [sp, #65520]          // encoding: [0xec,0xff,0xbf,0x3d]
+
+//------------------------------------------------------------------------------
+// Load/store register (register offset)
+//------------------------------------------------------------------------------
+
+        ldrb w3, [sp, x5]
+        ldrb w9, [x27, x6, lsl #0]
+        ldrsb w10, [x30, x7]
+        ldrb w11, [x29, x3, sxtx]
+        strb w12, [x28, xzr, sxtx #0]
+        ldrb w14, [x26, w6, uxtw]
+        ldrsb w15, [x25, w7, uxtw #0]
+        ldrb w17, [x23, w9, sxtw]
+        ldrsb x18, [x22, w10, sxtw #0]
+// CHECK: ldrb     w3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0x38]
+// CHECK: ldrb     w9, [x27, x6, lsl #0]      // encoding: [0x69,0x7b,0x66,0x38]
+// CHECK: ldrsb    w10, [x30, x7]             // encoding: [0xca,0x6b,0xe7,0x38]
+// CHECK: ldrb     w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x63,0x38]
+// CHECK: strb     w12, [x28, xzr, sxtx #0]   // encoding: [0x8c,0xfb,0x3f,0x38]
+// CHECK: ldrb     w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x66,0x38]
+// CHECK: ldrsb    w15, [x25, w7, uxtw #0]    // encoding: [0x2f,0x5b,0xe7,0x38]
+// CHECK: ldrb     w17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0x38]
+// CHECK: ldrsb    x18, [x22, w10, sxtw #0]   // encoding: [0xd2,0xda,0xaa,0x38]
+
+        ldrsh w3, [sp, x5]
+        ldrsh w9, [x27, x6, lsl #0]
+        ldrh w10, [x30, x7, lsl #1]
+        strh w11, [x29, x3, sxtx]
+        ldrh w12, [x28, xzr, sxtx #0]
+        ldrsh x13, [x27, x5, sxtx #1]
+        ldrh w14, [x26, w6, uxtw]
+        ldrh w15, [x25, w7, uxtw #0]
+        ldrsh w16, [x24, w8, uxtw #1]
+        ldrh w17, [x23, w9, sxtw]
+        ldrh w18, [x22, w10, sxtw #0]
+        strh w19, [x21, wzr, sxtw #1]
+// CHECK: ldrsh    w3, [sp, x5]               // encoding: [0xe3,0x6b,0xe5,0x78]
+// CHECK: ldrsh    w9, [x27, x6]              // encoding: [0x69,0x6b,0xe6,0x78]
+// CHECK: ldrh     w10, [x30, x7, lsl #1]     // encoding: [0xca,0x7b,0x67,0x78]
+// CHECK: strh     w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x23,0x78]
+// CHECK: ldrh     w12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x7f,0x78]
+// CHECK: ldrsh    x13, [x27, x5, sxtx #1]    // encoding: [0x6d,0xfb,0xa5,0x78]
+// CHECK: ldrh     w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x66,0x78]
+// CHECK: ldrh     w15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0x78]
+// CHECK: ldrsh    w16, [x24, w8, uxtw #1]    // encoding: [0x10,0x5b,0xe8,0x78]
+// CHECK: ldrh     w17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0x78]
+// CHECK: ldrh     w18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0x78]
+// CHECK: strh     w19, [x21, wzr, sxtw #1]   // encoding: [0xb3,0xda,0x3f,0x78]
+
+        ldr w3, [sp, x5]
+        ldr s9, [x27, x6, lsl #0]
+        ldr w10, [x30, x7, lsl #2]
+        ldr w11, [x29, x3, sxtx]
+        str s12, [x28, xzr, sxtx #0]
+        str w13, [x27, x5, sxtx #2]
+        str w14, [x26, w6, uxtw]
+        ldr w15, [x25, w7, uxtw #0]
+        ldr w16, [x24, w8, uxtw #2]
+        ldrsw x17, [x23, w9, sxtw]
+        ldr w18, [x22, w10, sxtw #0]
+        ldrsw x19, [x21, wzr, sxtw #2]
+// CHECK: ldr      w3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0xb8]
+// CHECK: ldr      s9, [x27, x6]              // encoding: [0x69,0x6b,0x66,0xbc]
+// CHECK: ldr      w10, [x30, x7, lsl #2]     // encoding: [0xca,0x7b,0x67,0xb8]
+// CHECK: ldr      w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x63,0xb8]
+// CHECK: str      s12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x3f,0xbc]
+// CHECK: str      w13, [x27, x5, sxtx #2]    // encoding: [0x6d,0xfb,0x25,0xb8]
+// CHECK: str      w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x26,0xb8]
+// CHECK: ldr      w15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0xb8]
+// CHECK: ldr      w16, [x24, w8, uxtw #2]    // encoding: [0x10,0x5b,0x68,0xb8]
+// CHECK: ldrsw    x17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0xa9,0xb8]
+// CHECK: ldr      w18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0xb8]
+// CHECK: ldrsw    x19, [x21, wzr, sxtw #2]   // encoding: [0xb3,0xda,0xbf,0xb8]
+
+        ldr x3, [sp, x5]
+        str x9, [x27, x6, lsl #0]
+        ldr d10, [x30, x7, lsl #3]
+        str x11, [x29, x3, sxtx]
+        ldr x12, [x28, xzr, sxtx #0]
+        ldr x13, [x27, x5, sxtx #3]
+        prfm pldl1keep, [x26, w6, uxtw]
+        ldr x15, [x25, w7, uxtw #0]
+        ldr x16, [x24, w8, uxtw #3]
+        ldr x17, [x23, w9, sxtw]
+        ldr x18, [x22, w10, sxtw #0]
+        str d19, [x21, wzr, sxtw #3]
+        prfm #6, [x0, x5]
+// CHECK: ldr      x3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0xf8]
+// CHECK: str      x9, [x27, x6]              // encoding: [0x69,0x6b,0x26,0xf8]
+// CHECK: ldr      d10, [x30, x7, lsl #3]     // encoding: [0xca,0x7b,0x67,0xfc]
+// CHECK: str      x11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x23,0xf8]
+// CHECK: ldr      x12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x7f,0xf8]
+// CHECK: ldr      x13, [x27, x5, sxtx #3]    // encoding: [0x6d,0xfb,0x65,0xf8]
+// CHECK: prfm     pldl1keep, [x26, w6, uxtw] // encoding: [0x40,0x4b,0xa6,0xf8]
+// CHECK: ldr      x15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0xf8]
+// CHECK: ldr      x16, [x24, w8, uxtw #3]    // encoding: [0x10,0x5b,0x68,0xf8]
+// CHECK: ldr      x17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0xf8]
+// CHECK: ldr      x18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0xf8]
+// CHECK: str      d19, [x21, wzr, sxtw #3]   // encoding: [0xb3,0xda,0x3f,0xfc]
+// CHECK: prfm     #6, [x0, x5, lsl #0]       // encoding: [0x06,0x68,0xa5,0xf8]
+
+        ldr q3, [sp, x5]
+        ldr q9, [x27, x6, lsl #0]
+        ldr q10, [x30, x7, lsl #4]
+        str q11, [x29, x3, sxtx]
+        str q12, [x28, xzr, sxtx #0]
+        str q13, [x27, x5, sxtx #4]
+        ldr q14, [x26, w6, uxtw]
+        ldr q15, [x25, w7, uxtw #0]
+        ldr q16, [x24, w8, uxtw #4]
+        ldr q17, [x23, w9, sxtw]
+        str q18, [x22, w10, sxtw #0]
+        ldr q19, [x21, wzr, sxtw #4]
+// CHECK: ldr      q3, [sp, x5]               // encoding: [0xe3,0x6b,0xe5,0x3c]
+// CHECK: ldr      q9, [x27, x6]              // encoding: [0x69,0x6b,0xe6,0x3c]
+// CHECK: ldr      q10, [x30, x7, lsl #4]     // encoding: [0xca,0x7b,0xe7,0x3c]
+// CHECK: str      q11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0xa3,0x3c]
+// CHECK: str      q12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0xbf,0x3c]
+// CHECK: str      q13, [x27, x5, sxtx #4]    // encoding: [0x6d,0xfb,0xa5,0x3c]
+// CHECK: ldr      q14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0xe6,0x3c]
+// CHECK: ldr      q15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0xe7,0x3c]
+// CHECK: ldr      q16, [x24, w8, uxtw #4]    // encoding: [0x10,0x5b,0xe8,0x3c]
+// CHECK: ldr      q17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0xe9,0x3c]
+// CHECK: str      q18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0xaa,0x3c]
+// CHECK: ldr      q19, [x21, wzr, sxtw #4]   // encoding: [0xb3,0xda,0xff,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store register (immediate post-indexed)
+//------------------------------------------------------------------------------
+
+        strb w9, [x2], #255
+        strb w10, [x3], #1
+        strb w10, [x3], #-256
+        strh w9, [x2], #255
+        strh w9, [x2], #1
+        strh w10, [x3], #-256
+// CHECK: strb     w9, [x2], #255             // encoding: [0x49,0xf4,0x0f,0x38]
+// CHECK: strb     w10, [x3], #1              // encoding: [0x6a,0x14,0x00,0x38]
+// CHECK: strb     w10, [x3], #-256           // encoding: [0x6a,0x04,0x10,0x38]
+// CHECK: strh     w9, [x2], #255             // encoding: [0x49,0xf4,0x0f,0x78]
+// CHECK: strh     w9, [x2], #1               // encoding: [0x49,0x14,0x00,0x78]
+// CHECK: strh     w10, [x3], #-256           // encoding: [0x6a,0x04,0x10,0x78]
+
+        str w19, [sp], #255
+        str w20, [x30], #1
+        str w21, [x12], #-256
+        str xzr, [x9], #255
+        str x2, [x3], #1
+        str x19, [x12], #-256
+// CHECK: str      w19, [sp], #255            // encoding: [0xf3,0xf7,0x0f,0xb8]
+// CHECK: str      w20, [x30], #1             // encoding: [0xd4,0x17,0x00,0xb8]
+// CHECK: str      w21, [x12], #-256          // encoding: [0x95,0x05,0x10,0xb8]
+// CHECK: str      xzr, [x9], #255            // encoding: [0x3f,0xf5,0x0f,0xf8]
+// CHECK: str      x2, [x3], #1               // encoding: [0x62,0x14,0x00,0xf8]
+// CHECK: str      x19, [x12], #-256          // encoding: [0x93,0x05,0x10,0xf8]
+
+        ldrb w9, [x2], #255
+        ldrb w10, [x3], #1
+        ldrb w10, [x3], #-256
+        ldrh w9, [x2], #255
+        ldrh w9, [x2], #1
+        ldrh w10, [x3], #-256
+// CHECK: ldrb     w9, [x2], #255             // encoding: [0x49,0xf4,0x4f,0x38]
+// CHECK: ldrb     w10, [x3], #1              // encoding: [0x6a,0x14,0x40,0x38]
+// CHECK: ldrb     w10, [x3], #-256           // encoding: [0x6a,0x04,0x50,0x38]
+// CHECK: ldrh     w9, [x2], #255             // encoding: [0x49,0xf4,0x4f,0x78]
+// CHECK: ldrh     w9, [x2], #1               // encoding: [0x49,0x14,0x40,0x78]
+// CHECK: ldrh     w10, [x3], #-256           // encoding: [0x6a,0x04,0x50,0x78]
+
+        ldr w19, [sp], #255
+        ldr w20, [x30], #1
+        ldr w21, [x12], #-256
+        ldr xzr, [x9], #255
+        ldr x2, [x3], #1
+        ldr x19, [x12], #-256
+// CHECK: ldr      w19, [sp], #255            // encoding: [0xf3,0xf7,0x4f,0xb8]
+// CHECK: ldr      w20, [x30], #1             // encoding: [0xd4,0x17,0x40,0xb8]
+// CHECK: ldr      w21, [x12], #-256          // encoding: [0x95,0x05,0x50,0xb8]
+// CHECK: ldr      xzr, [x9], #255            // encoding: [0x3f,0xf5,0x4f,0xf8]
+// CHECK: ldr      x2, [x3], #1               // encoding: [0x62,0x14,0x40,0xf8]
+// CHECK: ldr      x19, [x12], #-256          // encoding: [0x93,0x05,0x50,0xf8]
+
+        ldrsb xzr, [x9], #255
+        ldrsb x2, [x3], #1
+        ldrsb x19, [x12], #-256
+        ldrsh xzr, [x9], #255
+        ldrsh x2, [x3], #1
+        ldrsh x19, [x12], #-256
+        ldrsw xzr, [x9], #255
+        ldrsw x2, [x3], #1
+        ldrsw x19, [x12], #-256
+// CHECK: ldrsb    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0x38]
+// CHECK: ldrsb    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0x38]
+// CHECK: ldrsb    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0x38]
+// CHECK: ldrsh    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0x78]
+// CHECK: ldrsh    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0x78]
+// CHECK: ldrsh    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0x78]
+// CHECK: ldrsw    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0xb8]
+// CHECK: ldrsw    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0xb8]
+// CHECK: ldrsw    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0xb8]
+
+        ldrsb wzr, [x9], #255
+        ldrsb w2, [x3], #1
+        ldrsb w19, [x12], #-256
+        ldrsh wzr, [x9], #255
+        ldrsh w2, [x3], #1
+        ldrsh w19, [x12], #-256
+// CHECK: ldrsb    wzr, [x9], #255            // encoding: [0x3f,0xf5,0xcf,0x38]
+// CHECK: ldrsb    w2, [x3], #1               // encoding: [0x62,0x14,0xc0,0x38]
+// CHECK: ldrsb    w19, [x12], #-256          // encoding: [0x93,0x05,0xd0,0x38]
+// CHECK: ldrsh    wzr, [x9], #255            // encoding: [0x3f,0xf5,0xcf,0x78]
+// CHECK: ldrsh    w2, [x3], #1               // encoding: [0x62,0x14,0xc0,0x78]
+// CHECK: ldrsh    w19, [x12], #-256          // encoding: [0x93,0x05,0xd0,0x78]
+
+        str b0, [x0], #255
+        str b3, [x3], #1
+        str b5, [sp], #-256
+        str h10, [x10], #255
+        str h13, [x23], #1
+        str h15, [sp], #-256
+        str s20, [x20], #255
+        str s23, [x23], #1
+        str s25, [x0], #-256
+        str d20, [x20], #255
+        str d23, [x23], #1
+        str d25, [x0], #-256
+// CHECK: str      b0, [x0], #255             // encoding: [0x00,0xf4,0x0f,0x3c]
+// CHECK: str      b3, [x3], #1               // encoding: [0x63,0x14,0x00,0x3c]
+// CHECK: str      b5, [sp], #-256            // encoding: [0xe5,0x07,0x10,0x3c]
+// CHECK: str      h10, [x10], #255           // encoding: [0x4a,0xf5,0x0f,0x7c]
+// CHECK: str      h13, [x23], #1             // encoding: [0xed,0x16,0x00,0x7c]
+// CHECK: str      h15, [sp], #-256           // encoding: [0xef,0x07,0x10,0x7c]
+// CHECK: str      s20, [x20], #255           // encoding: [0x94,0xf6,0x0f,0xbc]
+// CHECK: str      s23, [x23], #1             // encoding: [0xf7,0x16,0x00,0xbc]
+// CHECK: str      s25, [x0], #-256           // encoding: [0x19,0x04,0x10,0xbc]
+// CHECK: str      d20, [x20], #255           // encoding: [0x94,0xf6,0x0f,0xfc]
+// CHECK: str      d23, [x23], #1             // encoding: [0xf7,0x16,0x00,0xfc]
+// CHECK: str      d25, [x0], #-256           // encoding: [0x19,0x04,0x10,0xfc]
+
+        ldr b0, [x0], #255
+        ldr b3, [x3], #1
+        ldr b5, [sp], #-256
+        ldr h10, [x10], #255
+        ldr h13, [x23], #1
+        ldr h15, [sp], #-256
+        ldr s20, [x20], #255
+        ldr s23, [x23], #1
+        ldr s25, [x0], #-256
+        ldr d20, [x20], #255
+        ldr d23, [x23], #1
+        ldr d25, [x0], #-256
+// CHECK: ldr      b0, [x0], #255             // encoding: [0x00,0xf4,0x4f,0x3c]
+// CHECK: ldr      b3, [x3], #1               // encoding: [0x63,0x14,0x40,0x3c]
+// CHECK: ldr      b5, [sp], #-256            // encoding: [0xe5,0x07,0x50,0x3c]
+// CHECK: ldr      h10, [x10], #255           // encoding: [0x4a,0xf5,0x4f,0x7c]
+// CHECK: ldr      h13, [x23], #1             // encoding: [0xed,0x16,0x40,0x7c]
+// CHECK: ldr      h15, [sp], #-256           // encoding: [0xef,0x07,0x50,0x7c]
+// CHECK: ldr      s20, [x20], #255           // encoding: [0x94,0xf6,0x4f,0xbc]
+// CHECK: ldr      s23, [x23], #1             // encoding: [0xf7,0x16,0x40,0xbc]
+// CHECK: ldr      s25, [x0], #-256           // encoding: [0x19,0x04,0x50,0xbc]
+// CHECK: ldr      d20, [x20], #255           // encoding: [0x94,0xf6,0x4f,0xfc]
+// CHECK: ldr      d23, [x23], #1             // encoding: [0xf7,0x16,0x40,0xfc]
+// CHECK: ldr      d25, [x0], #-256           // encoding: [0x19,0x04,0x50,0xfc]
+
+        ldr q20, [x1], #255
+        ldr q23, [x9], #1
+        ldr q25, [x20], #-256
+        str q10, [x1], #255
+        str q22, [sp], #1
+        str q21, [x20], #-256
+// CHECK: ldr      q20, [x1], #255            // encoding: [0x34,0xf4,0xcf,0x3c]
+// CHECK: ldr      q23, [x9], #1              // encoding: [0x37,0x15,0xc0,0x3c]
+// CHECK: ldr      q25, [x20], #-256          // encoding: [0x99,0x06,0xd0,0x3c]
+// CHECK: str      q10, [x1], #255            // encoding: [0x2a,0xf4,0x8f,0x3c]
+// CHECK: str      q22, [sp], #1              // encoding: [0xf6,0x17,0x80,0x3c]
+// CHECK: str      q21, [x20], #-256          // encoding: [0x95,0x06,0x90,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store register (immediate pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldr x3, [x4, #0]!
+        ldr xzr, [sp, #0]!
+// CHECK: ldr      x3, [x4, #0]!              // encoding: [0x83,0x0c,0x40,0xf8]
+// CHECK: ldr      xzr, [sp, #0]!              // encoding: [0xff,0x0f,0x40,0xf8]
+
+        strb w9, [x2, #255]!
+        strb w10, [x3, #1]!
+        strb w10, [x3, #-256]!
+        strh w9, [x2, #255]!
+        strh w9, [x2, #1]!
+        strh w10, [x3, #-256]!
+// CHECK: strb     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x0f,0x38]
+// CHECK: strb     w10, [x3, #1]!             // encoding: [0x6a,0x1c,0x00,0x38]
+// CHECK: strb     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x10,0x38]
+// CHECK: strh     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x0f,0x78]
+// CHECK: strh     w9, [x2, #1]!              // encoding: [0x49,0x1c,0x00,0x78]
+// CHECK: strh     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x10,0x78]
+
+        str w19, [sp, #255]!
+        str w20, [x30, #1]!
+        str w21, [x12, #-256]!
+        str xzr, [x9, #255]!
+        str x2, [x3, #1]!
+        str x19, [x12, #-256]!
+// CHECK: str      w19, [sp, #255]!           // encoding: [0xf3,0xff,0x0f,0xb8]
+// CHECK: str      w20, [x30, #1]!            // encoding: [0xd4,0x1f,0x00,0xb8]
+// CHECK: str      w21, [x12, #-256]!         // encoding: [0x95,0x0d,0x10,0xb8]
+// CHECK: str      xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x0f,0xf8]
+// CHECK: str      x2, [x3, #1]!              // encoding: [0x62,0x1c,0x00,0xf8]
+// CHECK: str      x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x10,0xf8]
+
+        ldrb w9, [x2, #255]!
+        ldrb w10, [x3, #1]!
+        ldrb w10, [x3, #-256]!
+        ldrh w9, [x2, #255]!
+        ldrh w9, [x2, #1]!
+        ldrh w10, [x3, #-256]!
+// CHECK: ldrb     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x4f,0x38]
+// CHECK: ldrb     w10, [x3, #1]!             // encoding: [0x6a,0x1c,0x40,0x38]
+// CHECK: ldrb     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x50,0x38]
+// CHECK: ldrh     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x4f,0x78]
+// CHECK: ldrh     w9, [x2, #1]!              // encoding: [0x49,0x1c,0x40,0x78]
+// CHECK: ldrh     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x50,0x78]
+
+        ldr w19, [sp, #255]!
+        ldr w20, [x30, #1]!
+        ldr w21, [x12, #-256]!
+        ldr xzr, [x9, #255]!
+        ldr x2, [x3, #1]!
+        ldr x19, [x12, #-256]!
+// CHECK: ldr      w19, [sp, #255]!           // encoding: [0xf3,0xff,0x4f,0xb8]
+// CHECK: ldr      w20, [x30, #1]!            // encoding: [0xd4,0x1f,0x40,0xb8]
+// CHECK: ldr      w21, [x12, #-256]!         // encoding: [0x95,0x0d,0x50,0xb8]
+// CHECK: ldr      xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x4f,0xf8]
+// CHECK: ldr      x2, [x3, #1]!              // encoding: [0x62,0x1c,0x40,0xf8]
+// CHECK: ldr      x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x50,0xf8]
+
+        ldrsb xzr, [x9, #255]!
+        ldrsb x2, [x3, #1]!
+        ldrsb x19, [x12, #-256]!
+        ldrsh xzr, [x9, #255]!
+        ldrsh x2, [x3, #1]!
+        ldrsh x19, [x12, #-256]!
+        ldrsw xzr, [x9, #255]!
+        ldrsw x2, [x3, #1]!
+        ldrsw x19, [x12, #-256]!
+// CHECK: ldrsb    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0x38]
+// CHECK: ldrsb    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0x38]
+// CHECK: ldrsb    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0x38]
+// CHECK: ldrsh    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0x78]
+// CHECK: ldrsh    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0x78]
+// CHECK: ldrsh    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0x78]
+// CHECK: ldrsw    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0xb8]
+// CHECK: ldrsw    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0xb8]
+// CHECK: ldrsw    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0xb8]
+
+        ldrsb wzr, [x9, #255]!
+        ldrsb w2, [x3, #1]!
+        ldrsb w19, [x12, #-256]!
+        ldrsh wzr, [x9, #255]!
+        ldrsh w2, [x3, #1]!
+        ldrsh w19, [x12, #-256]!
+// CHECK: ldrsb    wzr, [x9, #255]!           // encoding: [0x3f,0xfd,0xcf,0x38]
+// CHECK: ldrsb    w2, [x3, #1]!              // encoding: [0x62,0x1c,0xc0,0x38]
+// CHECK: ldrsb    w19, [x12, #-256]!         // encoding: [0x93,0x0d,0xd0,0x38]
+// CHECK: ldrsh    wzr, [x9, #255]!           // encoding: [0x3f,0xfd,0xcf,0x78]
+// CHECK: ldrsh    w2, [x3, #1]!              // encoding: [0x62,0x1c,0xc0,0x78]
+// CHECK: ldrsh    w19, [x12, #-256]!         // encoding: [0x93,0x0d,0xd0,0x78]
+
+        str b0, [x0, #255]!
+        str b3, [x3, #1]!
+        str b5, [sp, #-256]!
+        str h10, [x10, #255]!
+        str h13, [x23, #1]!
+        str h15, [sp, #-256]!
+        str s20, [x20, #255]!
+        str s23, [x23, #1]!
+        str s25, [x0, #-256]!
+        str d20, [x20, #255]!
+        str d23, [x23, #1]!
+        str d25, [x0, #-256]!
+// CHECK: str      b0, [x0, #255]!            // encoding: [0x00,0xfc,0x0f,0x3c]
+// CHECK: str      b3, [x3, #1]!              // encoding: [0x63,0x1c,0x00,0x3c]
+// CHECK: str      b5, [sp, #-256]!           // encoding: [0xe5,0x0f,0x10,0x3c]
+// CHECK: str      h10, [x10, #255]!          // encoding: [0x4a,0xfd,0x0f,0x7c]
+// CHECK: str      h13, [x23, #1]!            // encoding: [0xed,0x1e,0x00,0x7c]
+// CHECK: str      h15, [sp, #-256]!          // encoding: [0xef,0x0f,0x10,0x7c]
+// CHECK: str      s20, [x20, #255]!          // encoding: [0x94,0xfe,0x0f,0xbc]
+// CHECK: str      s23, [x23, #1]!            // encoding: [0xf7,0x1e,0x00,0xbc]
+// CHECK: str      s25, [x0, #-256]!          // encoding: [0x19,0x0c,0x10,0xbc]
+// CHECK: str      d20, [x20, #255]!          // encoding: [0x94,0xfe,0x0f,0xfc]
+// CHECK: str      d23, [x23, #1]!            // encoding: [0xf7,0x1e,0x00,0xfc]
+// CHECK: str      d25, [x0, #-256]!          // encoding: [0x19,0x0c,0x10,0xfc]
+
+        ldr b0, [x0, #255]!
+        ldr b3, [x3, #1]!
+        ldr b5, [sp, #-256]!
+        ldr h10, [x10, #255]!
+        ldr h13, [x23, #1]!
+        ldr h15, [sp, #-256]!
+        ldr s20, [x20, #255]!
+        ldr s23, [x23, #1]!
+        ldr s25, [x0, #-256]!
+        ldr d20, [x20, #255]!
+        ldr d23, [x23, #1]!
+        ldr d25, [x0, #-256]!
+// CHECK: ldr      b0, [x0, #255]!            // encoding: [0x00,0xfc,0x4f,0x3c]
+// CHECK: ldr      b3, [x3, #1]!              // encoding: [0x63,0x1c,0x40,0x3c]
+// CHECK: ldr      b5, [sp, #-256]!           // encoding: [0xe5,0x0f,0x50,0x3c]
+// CHECK: ldr      h10, [x10, #255]!          // encoding: [0x4a,0xfd,0x4f,0x7c]
+// CHECK: ldr      h13, [x23, #1]!            // encoding: [0xed,0x1e,0x40,0x7c]
+// CHECK: ldr      h15, [sp, #-256]!          // encoding: [0xef,0x0f,0x50,0x7c]
+// CHECK: ldr      s20, [x20, #255]!          // encoding: [0x94,0xfe,0x4f,0xbc]
+// CHECK: ldr      s23, [x23, #1]!            // encoding: [0xf7,0x1e,0x40,0xbc]
+// CHECK: ldr      s25, [x0, #-256]!          // encoding: [0x19,0x0c,0x50,0xbc]
+// CHECK: ldr      d20, [x20, #255]!          // encoding: [0x94,0xfe,0x4f,0xfc]
+// CHECK: ldr      d23, [x23, #1]!            // encoding: [0xf7,0x1e,0x40,0xfc]
+// CHECK: ldr      d25, [x0, #-256]!          // encoding: [0x19,0x0c,0x50,0xfc]
+
+        ldr q20, [x1, #255]!
+        ldr q23, [x9, #1]!
+        ldr q25, [x20, #-256]!
+        str q10, [x1, #255]!
+        str q22, [sp, #1]!
+        str q21, [x20, #-256]!
+// CHECK: ldr      q20, [x1, #255]!           // encoding: [0x34,0xfc,0xcf,0x3c]
+// CHECK: ldr      q23, [x9, #1]!             // encoding: [0x37,0x1d,0xc0,0x3c]
+// CHECK: ldr      q25, [x20, #-256]!         // encoding: [0x99,0x0e,0xd0,0x3c]
+// CHECK: str      q10, [x1, #255]!           // encoding: [0x2a,0xfc,0x8f,0x3c]
+// CHECK: str      q22, [sp, #1]!             // encoding: [0xf6,0x1f,0x80,0x3c]
+// CHECK: str      q21, [x20, #-256]!         // encoding: [0x95,0x0e,0x90,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store (unprivileged)
+//------------------------------------------------------------------------------
+
+        sttrb w9, [sp, #0]
+        sttrh wzr, [x12, #255]
+        sttr w16, [x0, #-256]
+        sttr x28, [x14, #1]
+// CHECK: sttrb    w9, [sp]                   // encoding: [0xe9,0x0b,0x00,0x38]
+// CHECK: sttrh    wzr, [x12, #255]           // encoding: [0x9f,0xf9,0x0f,0x78]
+// CHECK: sttr     w16, [x0, #-256]           // encoding: [0x10,0x08,0x10,0xb8]
+// CHECK: sttr     x28, [x14, #1]             // encoding: [0xdc,0x19,0x00,0xf8]
+
+        ldtrb w1, [x20, #255]
+        ldtrh w20, [x1, #255]
+        ldtr w12, [sp, #255]
+        ldtr xzr, [x12, #255]
+// CHECK: ldtrb    w1, [x20, #255]            // encoding: [0x81,0xfa,0x4f,0x38]
+// CHECK: ldtrh    w20, [x1, #255]            // encoding: [0x34,0xf8,0x4f,0x78]
+// CHECK: ldtr     w12, [sp, #255]            // encoding: [0xec,0xfb,0x4f,0xb8]
+// CHECK: ldtr     xzr, [x12, #255]           // encoding: [0x9f,0xf9,0x4f,0xf8]
+
+        ldtrsb x9, [x7, #-256]
+        ldtrsh x17, [x19, #-256]
+        ldtrsw x20, [x15, #-256]
+        ldtrsb w19, [x1, #-256]
+        ldtrsh w15, [x21, #-256]
+// CHECK: ldtrsb   x9, [x7, #-256]            // encoding: [0xe9,0x08,0x90,0x38]
+// CHECK: ldtrsh   x17, [x19, #-256]          // encoding: [0x71,0x0a,0x90,0x78]
+// CHECK: ldtrsw   x20, [x15, #-256]          // encoding: [0xf4,0x09,0x90,0xb8]
+// CHECK: ldtrsb   w19, [x1, #-256]           // encoding: [0x33,0x08,0xd0,0x38]
+// CHECK: ldtrsh   w15, [x21, #-256]          // encoding: [0xaf,0x0a,0xd0,0x78]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+
+        ldp w3, w5, [sp]
+        stp wzr, w9, [sp, #252]
+        ldp w2, wzr, [sp, #-256]
+        ldp w9, w10, [sp, #4]
+// CHECK: ldp      w3, w5, [sp]               // encoding: [0xe3,0x17,0x40,0x29]
+// CHECK: stp      wzr, w9, [sp, #252]        // encoding: [0xff,0xa7,0x1f,0x29]
+// CHECK: ldp      w2, wzr, [sp, #-256]       // encoding: [0xe2,0x7f,0x60,0x29]
+// CHECK: ldp      w9, w10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x29]
+
+        ldpsw x9, x10, [sp, #4]
+        ldpsw x9, x10, [x2, #-256]
+        ldpsw x20, x30, [sp, #252]
+// CHECK: ldpsw    x9, x10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x69]
+// CHECK: ldpsw    x9, x10, [x2, #-256]       // encoding: [0x49,0x28,0x60,0x69]
+// CHECK: ldpsw    x20, x30, [sp, #252]       // encoding: [0xf4,0xfb,0x5f,0x69]
+
+        ldp x21, x29, [x2, #504]
+        ldp x22, x23, [x3, #-512]
+        ldp x24, x25, [x4, #8]
+// CHECK: ldp      x21, x29, [x2, #504]       // encoding: [0x55,0xf4,0x5f,0xa9]
+// CHECK: ldp      x22, x23, [x3, #-512]      // encoding: [0x76,0x5c,0x60,0xa9]
+// CHECK: ldp      x24, x25, [x4, #8]         // encoding: [0x98,0xe4,0x40,0xa9]
+
+        ldp s29, s28, [sp, #252]
+        stp s27, s26, [sp, #-256]
+        ldp s1, s2, [x3, #44]
+// CHECK: ldp      s29, s28, [sp, #252]       // encoding: [0xfd,0xf3,0x5f,0x2d]
+// CHECK: stp      s27, s26, [sp, #-256]      // encoding: [0xfb,0x6b,0x20,0x2d]
+// CHECK: ldp      s1, s2, [x3, #44]          // encoding: [0x61,0x88,0x45,0x2d]
+
+        stp d3, d5, [x9, #504]
+        stp d7, d11, [x10, #-512]
+        ldp d2, d3, [x30, #-8]
+// CHECK: stp      d3, d5, [x9, #504]         // encoding: [0x23,0x95,0x1f,0x6d]
+// CHECK: stp      d7, d11, [x10, #-512]      // encoding: [0x47,0x2d,0x20,0x6d]
+// CHECK: ldp      d2, d3, [x30, #-8]         // encoding: [0xc2,0x8f,0x7f,0x6d]
+
+        stp q3, q5, [sp]
+        stp q17, q19, [sp, #1008]
+        ldp q23, q29, [x1, #-1024]
+// CHECK: stp      q3, q5, [sp]               // encoding: [0xe3,0x17,0x00,0xad]
+// CHECK: stp      q17, q19, [sp, #1008]      // encoding: [0xf1,0xcf,0x1f,0xad]
+// CHECK: ldp      q23, q29, [x1, #-1024]     // encoding: [0x37,0x74,0x60,0xad]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (post-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w5, [sp], #0
+        stp wzr, w9, [sp], #252
+        ldp w2, wzr, [sp], #-256
+        ldp w9, w10, [sp], #4
+// CHECK: ldp      w3, w5, [sp], #0           // encoding: [0xe3,0x17,0xc0,0x28]
+// CHECK: stp      wzr, w9, [sp], #252        // encoding: [0xff,0xa7,0x9f,0x28]
+// CHECK: ldp      w2, wzr, [sp], #-256       // encoding: [0xe2,0x7f,0xe0,0x28]
+// CHECK: ldp      w9, w10, [sp], #4          // encoding: [0xe9,0xab,0xc0,0x28]
+
+        ldpsw x9, x10, [sp], #4
+        ldpsw x9, x10, [x2], #-256
+        ldpsw x20, x30, [sp], #252
+// CHECK: ldpsw    x9, x10, [sp], #4          // encoding: [0xe9,0xab,0xc0,0x68]
+// CHECK: ldpsw    x9, x10, [x2], #-256       // encoding: [0x49,0x28,0xe0,0x68]
+// CHECK: ldpsw    x20, x30, [sp], #252       // encoding: [0xf4,0xfb,0xdf,0x68]
+
+        ldp x21, x29, [x2], #504
+        ldp x22, x23, [x3], #-512
+        ldp x24, x25, [x4], #8
+// CHECK: ldp      x21, x29, [x2], #504       // encoding: [0x55,0xf4,0xdf,0xa8]
+// CHECK: ldp      x22, x23, [x3], #-512      // encoding: [0x76,0x5c,0xe0,0xa8]
+// CHECK: ldp      x24, x25, [x4], #8         // encoding: [0x98,0xe4,0xc0,0xa8]
+
+        ldp s29, s28, [sp], #252
+        stp s27, s26, [sp], #-256
+        ldp s1, s2, [x3], #44
+// CHECK: ldp      s29, s28, [sp], #252       // encoding: [0xfd,0xf3,0xdf,0x2c]
+// CHECK: stp      s27, s26, [sp], #-256      // encoding: [0xfb,0x6b,0xa0,0x2c]
+// CHECK: ldp      s1, s2, [x3], #44          // encoding: [0x61,0x88,0xc5,0x2c]
+
+        stp d3, d5, [x9], #504
+        stp d7, d11, [x10], #-512
+        ldp d2, d3, [x30], #-8
+// CHECK: stp      d3, d5, [x9], #504         // encoding: [0x23,0x95,0x9f,0x6c]
+// CHECK: stp      d7, d11, [x10], #-512      // encoding: [0x47,0x2d,0xa0,0x6c]
+// CHECK: ldp      d2, d3, [x30], #-8         // encoding: [0xc2,0x8f,0xff,0x6c]
+
+        stp q3, q5, [sp], #0
+        stp q17, q19, [sp], #1008
+        ldp q23, q29, [x1], #-1024
+// CHECK: stp      q3, q5, [sp], #0           // encoding: [0xe3,0x17,0x80,0xac]
+// CHECK: stp      q17, q19, [sp], #1008      // encoding: [0xf1,0xcf,0x9f,0xac]
+// CHECK: ldp      q23, q29, [x1], #-1024     // encoding: [0x37,0x74,0xe0,0xac]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (pre-indexed)
+//------------------------------------------------------------------------------
+        ldp w3, w5, [sp, #0]!
+        stp wzr, w9, [sp, #252]!
+        ldp w2, wzr, [sp, #-256]!
+        ldp w9, w10, [sp, #4]!
+// CHECK: ldp      w3, w5, [sp, #0]!          // encoding: [0xe3,0x17,0xc0,0x29]
+// CHECK: stp      wzr, w9, [sp, #252]!       // encoding: [0xff,0xa7,0x9f,0x29]
+// CHECK: ldp      w2, wzr, [sp, #-256]!      // encoding: [0xe2,0x7f,0xe0,0x29]
+// CHECK: ldp      w9, w10, [sp, #4]!         // encoding: [0xe9,0xab,0xc0,0x29]
+
+        ldpsw x9, x10, [sp, #4]!
+        ldpsw x9, x10, [x2, #-256]!
+        ldpsw x20, x30, [sp, #252]!
+// CHECK: ldpsw    x9, x10, [sp, #4]!         // encoding: [0xe9,0xab,0xc0,0x69]
+// CHECK: ldpsw    x9, x10, [x2, #-256]!      // encoding: [0x49,0x28,0xe0,0x69]
+// CHECK: ldpsw    x20, x30, [sp, #252]!      // encoding: [0xf4,0xfb,0xdf,0x69]
+
+        ldp x21, x29, [x2, #504]!
+        ldp x22, x23, [x3, #-512]!
+        ldp x24, x25, [x4, #8]!
+// CHECK: ldp      x21, x29, [x2, #504]!      // encoding: [0x55,0xf4,0xdf,0xa9]
+// CHECK: ldp      x22, x23, [x3, #-512]!     // encoding: [0x76,0x5c,0xe0,0xa9]
+// CHECK: ldp      x24, x25, [x4, #8]!        // encoding: [0x98,0xe4,0xc0,0xa9]
+
+        ldp s29, s28, [sp, #252]!
+        stp s27, s26, [sp, #-256]!
+        ldp s1, s2, [x3, #44]!
+// CHECK: ldp      s29, s28, [sp, #252]!      // encoding: [0xfd,0xf3,0xdf,0x2d]
+// CHECK: stp      s27, s26, [sp, #-256]!     // encoding: [0xfb,0x6b,0xa0,0x2d]
+// CHECK: ldp      s1, s2, [x3, #44]!         // encoding: [0x61,0x88,0xc5,0x2d]
+
+        stp d3, d5, [x9, #504]!
+        stp d7, d11, [x10, #-512]!
+        ldp d2, d3, [x30, #-8]!
+// CHECK: stp      d3, d5, [x9, #504]!        // encoding: [0x23,0x95,0x9f,0x6d]
+// CHECK: stp      d7, d11, [x10, #-512]!     // encoding: [0x47,0x2d,0xa0,0x6d]
+// CHECK: ldp      d2, d3, [x30, #-8]!        // encoding: [0xc2,0x8f,0xff,0x6d]
+
+        stp q3, q5, [sp, #0]!
+        stp q17, q19, [sp, #1008]!
+        ldp q23, q29, [x1, #-1024]!
+// CHECK: stp      q3, q5, [sp, #0]!          // encoding: [0xe3,0x17,0x80,0xad]
+// CHECK: stp      q17, q19, [sp, #1008]!     // encoding: [0xf1,0xcf,0x9f,0xad]
+// CHECK: ldp      q23, q29, [x1, #-1024]!    // encoding: [0x37,0x74,0xe0,0xad]
+
+//------------------------------------------------------------------------------
+// Load/store non-temporal register pair (offset)
+//------------------------------------------------------------------------------
+
+        ldnp w3, w5, [sp]
+        stnp wzr, w9, [sp, #252]
+        ldnp w2, wzr, [sp, #-256]
+        ldnp w9, w10, [sp, #4]
+// CHECK: ldnp      w3, w5, [sp]               // encoding: [0xe3,0x17,0x40,0x28]
+// CHECK: stnp      wzr, w9, [sp, #252]        // encoding: [0xff,0xa7,0x1f,0x28]
+// CHECK: ldnp      w2, wzr, [sp, #-256]       // encoding: [0xe2,0x7f,0x60,0x28]
+// CHECK: ldnp      w9, w10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x28]
+
+        ldnp x21, x29, [x2, #504]
+        ldnp x22, x23, [x3, #-512]
+        ldnp x24, x25, [x4, #8]
+// CHECK: ldnp      x21, x29, [x2, #504]       // encoding: [0x55,0xf4,0x5f,0xa8]
+// CHECK: ldnp      x22, x23, [x3, #-512]      // encoding: [0x76,0x5c,0x60,0xa8]
+// CHECK: ldnp      x24, x25, [x4, #8]         // encoding: [0x98,0xe4,0x40,0xa8]
+
+        ldnp s29, s28, [sp, #252]
+        stnp s27, s26, [sp, #-256]
+        ldnp s1, s2, [x3, #44]
+// CHECK: ldnp      s29, s28, [sp, #252]       // encoding: [0xfd,0xf3,0x5f,0x2c]
+// CHECK: stnp      s27, s26, [sp, #-256]      // encoding: [0xfb,0x6b,0x20,0x2c]
+// CHECK: ldnp      s1, s2, [x3, #44]          // encoding: [0x61,0x88,0x45,0x2c]
+
+        stnp d3, d5, [x9, #504]
+        stnp d7, d11, [x10, #-512]
+        ldnp d2, d3, [x30, #-8]
+// CHECK: stnp      d3, d5, [x9, #504]         // encoding: [0x23,0x95,0x1f,0x6c]
+// CHECK: stnp      d7, d11, [x10, #-512]      // encoding: [0x47,0x2d,0x20,0x6c]
+// CHECK: ldnp      d2, d3, [x30, #-8]         // encoding: [0xc2,0x8f,0x7f,0x6c]
+
+        stnp q3, q5, [sp]
+        stnp q17, q19, [sp, #1008]
+        ldnp q23, q29, [x1, #-1024]
+// CHECK: stnp      q3, q5, [sp]               // encoding: [0xe3,0x17,0x00,0xac]
+// CHECK: stnp      q17, q19, [sp, #1008]      // encoding: [0xf1,0xcf,0x1f,0xac]
+// CHECK: ldnp      q23, q29, [x1, #-1024]     // encoding: [0x37,0x74,0x60,0xac]
+
+//------------------------------------------------------------------------------
+// Logical (immediate)
+//------------------------------------------------------------------------------
+        // 32 bit replication-width
+        orr w3, w9, #0xffff0000
+        orr wsp, w10, #0xe00000ff
+        orr w9, w10, #0x000003ff
+// CHECK: orr      w3, w9, #0xffff0000        // encoding: [0x23,0x3d,0x10,0x32]
+// CHECK: orr      wsp, w10, #0xe00000ff      // encoding: [0x5f,0x29,0x03,0x32]
+// CHECK: orr      w9, w10, #0x3ff            // encoding: [0x49,0x25,0x00,0x32]
+
+        // 16 bit replication width
+        and w14, w15, #0x80008000
+        and w12, w13, #0xffc3ffc3
+        and w11, wzr, #0x00030003
+// CHECK: and      w14, w15, #0x80008000      // encoding: [0xee,0x81,0x01,0x12]
+// CHECK: and      w12, w13, #0xffc3ffc3      // encoding: [0xac,0xad,0x0a,0x12]
+// CHECK: and      w11, wzr, #0x30003         // encoding: [0xeb,0x87,0x00,0x12]
+
+        // 8 bit replication width
+        eor w3, w6, #0xe0e0e0e0
+        eor wsp, wzr, #0x03030303
+        eor w16, w17, #0x81818181
+// CHECK: eor      w3, w6, #0xe0e0e0e0        // encoding: [0xc3,0xc8,0x03,0x52]
+// CHECK: eor      wsp, wzr, #0x3030303       // encoding: [0xff,0xc7,0x00,0x52]
+// CHECK: eor      w16, w17, #0x81818181      // encoding: [0x30,0xc6,0x01,0x52]
+
+        // 4 bit replication width
+        ands wzr, w18, #0xcccccccc
+        ands w19, w20, #0x33333333
+        ands w21, w22, #0x99999999
+// CHECK: ands     wzr, w18, #0xcccccccc      // encoding: [0x5f,0xe6,0x02,0x72]
+// CHECK: ands     w19, w20, #0x33333333      // encoding: [0x93,0xe6,0x00,0x72]
+// CHECK: ands     w21, w22, #0x99999999      // encoding: [0xd5,0xe6,0x01,0x72]
+
+        // 2 bit replication width
+        tst w3, #0xaaaaaaaa
+        tst wzr, #0x55555555
+// CHECK: ands     wzr, w3, #0xaaaaaaaa       // encoding: [0x7f,0xf0,0x01,0x72]
+// CHECK: ands     wzr, wzr, #0x55555555      // encoding: [0xff,0xf3,0x00,0x72]
+
+        // 64 bit replication-width
+        eor x3, x5, #0xffffffffc000000
+        and x9, x10, #0x00007fffffffffff
+        orr x11, x12, #0x8000000000000fff
+// CHECK: eor      x3, x5, #0xffffffffc000000 // encoding: [0xa3,0x84,0x66,0xd2]
+// CHECK: and      x9, x10, #0x7fffffffffff   // encoding: [0x49,0xb9,0x40,0x92]
+// CHECK: orr      x11, x12, #0x8000000000000fff // encoding: [0x8b,0x31,0x41,0xb2]
+
+        // 32 bit replication-width
+        orr x3, x9, #0xffff0000ffff0000
+        orr sp, x10, #0xe00000ffe00000ff
+        orr x9, x10, #0x000003ff000003ff
+// CHECK: orr      x3, x9, #0xffff0000ffff0000 // encoding: [0x23,0x3d,0x10,0xb2]
+// CHECK: orr      sp, x10, #0xe00000ffe00000ff // encoding: [0x5f,0x29,0x03,0xb2]
+// CHECK: orr      x9, x10, #0x3ff000003ff    // encoding: [0x49,0x25,0x00,0xb2]
+
+        // 16 bit replication-width
+        and x14, x15, #0x8000800080008000
+        and x12, x13, #0xffc3ffc3ffc3ffc3
+        and x11, xzr, #0x0003000300030003
+// CHECK: and      x14, x15, #0x8000800080008000 // encoding: [0xee,0x81,0x01,0x92]
+// CHECK: and      x12, x13, #0xffc3ffc3ffc3ffc3 // encoding: [0xac,0xad,0x0a,0x92]
+// CHECK: and      x11, xzr, #0x3000300030003 // encoding: [0xeb,0x87,0x00,0x92]
+
+        // 8 bit replication-width
+        eor x3, x6, #0xe0e0e0e0e0e0e0e0
+        eor sp, xzr, #0x0303030303030303
+        eor x16, x17, #0x8181818181818181
+// CHECK: eor      x3, x6, #0xe0e0e0e0e0e0e0e0 // encoding: [0xc3,0xc8,0x03,0xd2]
+// CHECK: eor      sp, xzr, #0x303030303030303 // encoding: [0xff,0xc7,0x00,0xd2]
+// CHECK: eor      x16, x17, #0x8181818181818181 // encoding: [0x30,0xc6,0x01,0xd2]
+
+        // 4 bit replication-width
+        ands xzr, x18, #0xcccccccccccccccc
+        ands x19, x20, #0x3333333333333333
+        ands x21, x22, #0x9999999999999999
+// CHECK: ands     xzr, x18, #0xcccccccccccccccc // encoding: [0x5f,0xe6,0x02,0xf2]
+// CHECK: ands     x19, x20, #0x3333333333333333 // encoding: [0x93,0xe6,0x00,0xf2]
+// CHECK: ands     x21, x22, #0x9999999999999999 // encoding: [0xd5,0xe6,0x01,0xf2]
+
+        // 2 bit replication-width
+        tst x3, #0xaaaaaaaaaaaaaaaa
+        tst xzr, #0x5555555555555555
+// CHECK: ands     xzr, x3, #0xaaaaaaaaaaaaaaaa    // encoding: [0x7f,0xf0,0x01,0xf2]
+// CHECK: ands     xzr, xzr, #0x5555555555555555   // encoding: [0xff,0xf3,0x00,0xf2]
+
+        mov w3, #0xf000f
+        mov x10, #0xaaaaaaaaaaaaaaaa
+// CHECK: orr      w3, wzr, #0xf000f          // encoding: [0xe3,0x8f,0x00,0x32]
+// CHECK: orr      x10, xzr, #0xaaaaaaaaaaaaaaaa // encoding: [0xea,0xf3,0x01,0xb2]
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+
+        and w12, w23, w21
+        and w16, w15, w1, lsl #1
+        and w9, w4, w10, lsl #31
+        and w3, w30, w11, lsl #0
+        and x3, x5, x7, lsl #63
+// CHECK: and      w12, w23, w21              // encoding: [0xec,0x02,0x15,0x0a]
+// CHECK: and      w16, w15, w1, lsl #1       // encoding: [0xf0,0x05,0x01,0x0a]
+// CHECK: and      w9, w4, w10, lsl #31       // encoding: [0x89,0x7c,0x0a,0x0a]
+// CHECK: and      w3, w30, w11               // encoding: [0xc3,0x03,0x0b,0x0a]
+// CHECK: and      x3, x5, x7, lsl #63        // encoding: [0xa3,0xfc,0x07,0x8a]
+
+        and x5, x14, x19, asr #4
+        and w3, w17, w19, ror #31
+        and w0, w2, wzr, lsr #17
+        and w3, w30, w11, asr #0
+// CHECK: and      x5, x14, x19, asr #4       // encoding: [0xc5,0x11,0x93,0x8a]
+// CHECK: and      w3, w17, w19, ror #31      // encoding: [0x23,0x7e,0xd3,0x0a]
+// CHECK: and      w0, w2, wzr, lsr #17       // encoding: [0x40,0x44,0x5f,0x0a]
+// CHECK: and      w3, w30, w11, asr #0       // encoding: [0xc3,0x03,0x8b,0x0a]
+
+        and xzr, x4, x26, lsl #0
+        and w3, wzr, w20, ror #0
+        and x7, x20, xzr, asr #63
+// CHECK: and      xzr, x4, x26               // encoding: [0x9f,0x00,0x1a,0x8a]
+// CHECK: and      w3, wzr, w20, ror #0       // encoding: [0xe3,0x03,0xd4,0x0a]
+// CHECK: and      x7, x20, xzr, asr #63      // encoding: [0x87,0xfe,0x9f,0x8a]
+
+        bic x13, x20, x14, lsl #47
+        bic w2, w7, w9
+        orr w2, w7, w0, asr #31
+        orr x8, x9, x10, lsl #12
+        orn x3, x5, x7, asr #0
+        orn w2, w5, w29
+// CHECK: bic      x13, x20, x14, lsl #47     // encoding: [0x8d,0xbe,0x2e,0x8a]
+// CHECK: bic      w2, w7, w9                 // encoding: [0xe2,0x00,0x29,0x0a]
+// CHECK: orr      w2, w7, w0, asr #31        // encoding: [0xe2,0x7c,0x80,0x2a]
+// CHECK: orr      x8, x9, x10, lsl #12       // encoding: [0x28,0x31,0x0a,0xaa]
+// CHECK: orn      x3, x5, x7, asr #0         // encoding: [0xa3,0x00,0xa7,0xaa]
+// CHECK: orn      w2, w5, w29                // encoding: [0xa2,0x00,0x3d,0x2a]
+
+        ands w7, wzr, w9, lsl #1
+        ands x3, x5, x20, ror #63
+        bics w3, w5, w7, lsl #0
+        bics x3, xzr, x3, lsl #1
+// CHECK: ands     w7, wzr, w9, lsl #1        // encoding: [0xe7,0x07,0x09,0x6a]
+// CHECK: ands     x3, x5, x20, ror #63       // encoding: [0xa3,0xfc,0xd4,0xea]
+// CHECK: bics     w3, w5, w7                 // encoding: [0xa3,0x00,0x27,0x6a]
+// CHECK: bics     x3, xzr, x3, lsl #1        // encoding: [0xe3,0x07,0x23,0xea]
+
+        tst w3, w7, lsl #31
+        tst x2, x20, asr #0
+// CHECK: tst      w3, w7, lsl #31            // encoding: [0x7f,0x7c,0x07,0x6a]
+// CHECK: tst      x2, x20, asr #0            // encoding: [0x5f,0x00,0x94,0xea]
+
+        mov x3, x6
+        mov x3, xzr
+        mov wzr, w2
+        mov w3, w5
+// CHECK: mov      x3, x6                     // encoding: [0xe3,0x03,0x06,0xaa]
+// CHECK: mov      x3, xzr                    // encoding: [0xe3,0x03,0x1f,0xaa]
+// CHECK: mov      wzr, w2                    // encoding: [0xff,0x03,0x02,0x2a]
+// CHECK: mov      w3, w5                     // encoding: [0xe3,0x03,0x05,0x2a]
+
+//------------------------------------------------------------------------------
+// Move wide (immediate)
+//------------------------------------------------------------------------------
+
+        movz w1, #65535, lsl #0
+        movz w2, #0, lsl #16
+        movn w2, #1234, lsl #0
+// CHECK: movz     w1, #65535                 // encoding: [0xe1,0xff,0x9f,0x52]
+// CHECK: movz     w2, #0, lsl #16            // encoding: [0x02,0x00,0xa0,0x52]
+// CHECK: movn     w2, #1234                  // encoding: [0x42,0x9a,0x80,0x12]
+
+        movz x2, #1234, lsl #32
+        movk xzr, #4321, lsl #48
+// CHECK: movz     x2, #1234, lsl #32         // encoding: [0x42,0x9a,0xc0,0xd2]
+// CHECK: movk     xzr, #4321, lsl #48        // encoding: [0x3f,0x1c,0xe2,0xf2]
+
+        movz x2, #:abs_g0:sym
+        movk w3, #:abs_g0_nc:sym
+// CHECK: movz    x2, #:abs_g0:sym        // encoding: [0x02'A',A,0x80'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_a64_movw_uabs_g0
+// CHECK: movk     w3, #:abs_g0_nc:sym    // encoding: [0x03'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_a64_movw_uabs_g0_nc
+
+        movz x4, #:abs_g1:sym
+        movk w5, #:abs_g1_nc:sym
+// CHECK: movz     x4, #:abs_g1:sym       // encoding: [0x04'A',A,0xa0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_a64_movw_uabs_g1
+// CHECK: movk     w5, #:abs_g1_nc:sym    // encoding: [0x05'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_a64_movw_uabs_g1_nc
+
+        movz x6, #:abs_g2:sym
+        movk x7, #:abs_g2_nc:sym
+// CHECK: movz     x6, #:abs_g2:sym       // encoding: [0x06'A',A,0xc0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_a64_movw_uabs_g2
+// CHECK: movk     x7, #:abs_g2_nc:sym    // encoding: [0x07'A',A,0xc0'A',0xf2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_a64_movw_uabs_g2_nc
+
+        movz x8, #:abs_g3:sym
+        movk x9, #:abs_g3:sym
+// CHECK: movz     x8, #:abs_g3:sym       // encoding: [0x08'A',A,0xe0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3
+// CHECK: movk     x9, #:abs_g3:sym       // encoding: [0x09'A',A,0xe0'A',0xf2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3
+
+        movn x30, #:abs_g0_s:sym
+        movz x19, #:abs_g0_s:sym
+        movn w10, #:abs_g0_s:sym
+        movz w25, #:abs_g0_s:sym
+// CHECK: movn     x30, #:abs_g0_s:sym    // encoding: [0x1e'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movz     x19, #:abs_g0_s:sym    // encoding: [0x13'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movn     w10, #:abs_g0_s:sym    // encoding: [0x0a'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movz     w25, #:abs_g0_s:sym    // encoding: [0x19'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+
+        movn x30, #:abs_g1_s:sym
+        movz x19, #:abs_g1_s:sym
+        movn w10, #:abs_g1_s:sym
+        movz w25, #:abs_g1_s:sym
+// CHECK: movn     x30, #:abs_g1_s:sym    // encoding: [0x1e'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movz     x19, #:abs_g1_s:sym    // encoding: [0x13'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movn     w10, #:abs_g1_s:sym    // encoding: [0x0a'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movz     w25, #:abs_g1_s:sym    // encoding: [0x19'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+
+        movn x30, #:abs_g2_s:sym
+        movz x19, #:abs_g2_s:sym
+// CHECK: movn     x30, #:abs_g2_s:sym    // encoding: [0x1e'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2
+// CHECK: movz     x19, #:abs_g2_s:sym    // encoding: [0x13'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2
+
+//------------------------------------------------------------------------------
+// PC-relative addressing
+//------------------------------------------------------------------------------
+
+        adr x2, loc
+        adr xzr, loc
+ // CHECK: adr     x2, loc                 // encoding: [0x02'A',A,A,0x10'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel
+ // CHECK: adr     xzr, loc                // encoding: [0x1f'A',A,A,0x10'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel
+
+        adrp x29, loc
+ // CHECK: adrp    x29, loc                // encoding: [0x1d'A',A,A,0x90'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel_page
+
+        adrp x30, #4096
+        adr x20, #0
+        adr x9, #-1
+        adr x5, #1048575
+// CHECK: adrp    x30, #4096              // encoding: [0x1e,0x00,0x00,0xb0]
+// CHECK: adr     x20, #0                 // encoding: [0x14,0x00,0x00,0x10]
+// CHECK: adr     x9, #-1                 // encoding: [0xe9,0xff,0xff,0x70]
+// CHECK: adr     x5, #1048575            // encoding: [0xe5,0xff,0x7f,0x70]
+
+        adr x9, #1048575
+        adr x2, #-1048576
+        adrp x9, #4294963200
+        adrp x20, #-4294967296
+// CHECK: adr     x9, #1048575            // encoding: [0xe9,0xff,0x7f,0x70]
+// CHECK: adr     x2, #-1048576           // encoding: [0x02,0x00,0x80,0x10]
+// CHECK: adrp    x9, #4294963200         // encoding: [0xe9,0xff,0x7f,0xf0]
+// CHECK: adrp    x20, #-4294967296       // encoding: [0x14,0x00,0x80,0x90]
+
+//------------------------------------------------------------------------------
+// System
+//------------------------------------------------------------------------------
+
+        hint #0
+        hint #127
+// CHECK: nop                             // encoding: [0x1f,0x20,0x03,0xd5]
+// CHECK: hint    #127                    // encoding: [0xff,0x2f,0x03,0xd5]
+
+        nop
+        yield
+        wfe
+        wfi
+        sev
+        sevl
+// CHECK: nop                             // encoding: [0x1f,0x20,0x03,0xd5]
+// CHECK: yield                           // encoding: [0x3f,0x20,0x03,0xd5]
+// CHECK: wfe                             // encoding: [0x5f,0x20,0x03,0xd5]
+// CHECK: wfi                             // encoding: [0x7f,0x20,0x03,0xd5]
+// CHECK: sev                             // encoding: [0x9f,0x20,0x03,0xd5]
+// CHECK: sevl                            // encoding: [0xbf,0x20,0x03,0xd5]
+
+        clrex
+        clrex #0
+        clrex #7
+        clrex #15
+// CHECK: clrex                           // encoding: [0x5f,0x3f,0x03,0xd5]
+// CHECK: clrex   #0                      // encoding: [0x5f,0x30,0x03,0xd5]
+// CHECK: clrex   #7                      // encoding: [0x5f,0x37,0x03,0xd5]
+// CHECK: clrex                           // encoding: [0x5f,0x3f,0x03,0xd5]
+
+        dsb #0
+        dsb #12
+        dsb #15
+        dsb oshld
+        dsb oshst
+        dsb osh
+        dsb nshld
+        dsb nshst
+        dsb nsh
+        dsb ishld
+        dsb ishst
+        dsb ish
+        dsb ld
+        dsb st
+        dsb sy
+// CHECK: dsb     #0                      // encoding: [0x9f,0x30,0x03,0xd5]
+// CHECK: dsb     #12                     // encoding: [0x9f,0x3c,0x03,0xd5]
+// CHECK: dsb     sy                      // encoding: [0x9f,0x3f,0x03,0xd5]
+// CHECK: dsb     oshld                   // encoding: [0x9f,0x31,0x03,0xd5]
+// CHECK: dsb     oshst                   // encoding: [0x9f,0x32,0x03,0xd5]
+// CHECK: dsb     osh                     // encoding: [0x9f,0x33,0x03,0xd5]
+// CHECK: dsb     nshld                   // encoding: [0x9f,0x35,0x03,0xd5]
+// CHECK: dsb     nshst                   // encoding: [0x9f,0x36,0x03,0xd5]
+// CHECK: dsb     nsh                     // encoding: [0x9f,0x37,0x03,0xd5]
+// CHECK: dsb     ishld                   // encoding: [0x9f,0x39,0x03,0xd5]
+// CHECK: dsb     ishst                   // encoding: [0x9f,0x3a,0x03,0xd5]
+// CHECK: dsb     ish                     // encoding: [0x9f,0x3b,0x03,0xd5]
+// CHECK: dsb     ld                      // encoding: [0x9f,0x3d,0x03,0xd5]
+// CHECK: dsb     st                      // encoding: [0x9f,0x3e,0x03,0xd5]
+// CHECK: dsb     sy                      // encoding: [0x9f,0x3f,0x03,0xd5]
+
+        dmb #0
+        dmb #12
+        dmb #15
+        dmb oshld
+        dmb oshst
+        dmb osh
+        dmb nshld
+        dmb nshst
+        dmb nsh
+        dmb ishld
+        dmb ishst
+        dmb ish
+        dmb ld
+        dmb st
+        dmb sy
+// CHECK: dmb     #0                      // encoding: [0xbf,0x30,0x03,0xd5]
+// CHECK: dmb     #12                     // encoding: [0xbf,0x3c,0x03,0xd5]
+// CHECK: dmb     sy                      // encoding: [0xbf,0x3f,0x03,0xd5]
+// CHECK: dmb     oshld                   // encoding: [0xbf,0x31,0x03,0xd5]
+// CHECK: dmb     oshst                   // encoding: [0xbf,0x32,0x03,0xd5]
+// CHECK: dmb     osh                     // encoding: [0xbf,0x33,0x03,0xd5]
+// CHECK: dmb     nshld                   // encoding: [0xbf,0x35,0x03,0xd5]
+// CHECK: dmb     nshst                   // encoding: [0xbf,0x36,0x03,0xd5]
+// CHECK: dmb     nsh                     // encoding: [0xbf,0x37,0x03,0xd5]
+// CHECK: dmb     ishld                   // encoding: [0xbf,0x39,0x03,0xd5]
+// CHECK: dmb     ishst                   // encoding: [0xbf,0x3a,0x03,0xd5]
+// CHECK: dmb     ish                     // encoding: [0xbf,0x3b,0x03,0xd5]
+// CHECK: dmb     ld                      // encoding: [0xbf,0x3d,0x03,0xd5]
+// CHECK: dmb     st                      // encoding: [0xbf,0x3e,0x03,0xd5]
+// CHECK: dmb     sy                      // encoding: [0xbf,0x3f,0x03,0xd5]
+
+        isb sy
+        isb
+        isb #12
+// CHECK: isb                             // encoding: [0xdf,0x3f,0x03,0xd5]
+// CHECK: isb                             // encoding: [0xdf,0x3f,0x03,0xd5]
+// CHECK: isb     #12                     // encoding: [0xdf,0x3c,0x03,0xd5]
+
+
+        msr spsel, #0
+        msr daifset, #15
+        msr daifclr, #12
+// CHECK: msr     spsel, #0               // encoding: [0xbf,0x40,0x00,0xd5]
+// CHECK: msr     daifset, #15            // encoding: [0xdf,0x4f,0x03,0xd5]
+// CHECK: msr     daifclr, #12            // encoding: [0xff,0x4c,0x03,0xd5]
+
+        sys #7, c5, c9, #7, x5
+        sys #0, c15, c15, #2
+// CHECK: sys     #7, c5, c9, #7, x5      // encoding: [0xe5,0x59,0x0f,0xd5]
+// CHECK: sys     #0, c15, c15, #2, xzr   // encoding: [0x5f,0xff,0x08,0xd5]
+
+        sysl x9, #7, c5, c9, #7
+        sysl x1, #0, c15, c15, #2
+// CHECK: sysl    x9, #7, c5, c9, #7      // encoding: [0xe9,0x59,0x2f,0xd5]
+// CHECK: sysl    x1, #0, c15, c15, #2    // encoding: [0x41,0xff,0x28,0xd5]
+
+        ic ialluis
+        ic iallu
+        ic ivau, x9
+// CHECK:         ic      ialluis                 // encoding: [0x1f,0x71,0x08,0xd5]
+// CHECK:         ic      iallu                   // encoding: [0x1f,0x75,0x08,0xd5]
+// CHECK:         ic      ivau, x9                // encoding: [0x29,0x75,0x0b,0xd5]
+
+        dc zva, x12
+        dc ivac, xzr
+        dc isw, x2
+        dc cvac, x9
+        dc csw, x10
+        dc cvau, x0
+        dc civac, x3
+        dc cisw, x30
+// CHECK:         dc      zva, x12                // encoding: [0x2c,0x74,0x0b,0xd5]
+// CHECK:         dc      ivac, xzr               // encoding: [0x3f,0x76,0x08,0xd5]
+// CHECK:         dc      isw, x2                 // encoding: [0x42,0x76,0x08,0xd5]
+// CHECK:         dc      cvac, x9                // encoding: [0x29,0x7a,0x0b,0xd5]
+// CHECK:         dc      csw, x10                // encoding: [0x4a,0x7a,0x08,0xd5]
+// CHECK:         dc      cvau, x0                // encoding: [0x20,0x7b,0x0b,0xd5]
+// CHECK:         dc      civac, x3               // encoding: [0x23,0x7e,0x0b,0xd5]
+// CHECK:         dc      cisw, x30               // encoding: [0x5e,0x7e,0x08,0xd5]
+
+        at S1E1R, x19
+        at S1E2R, x19
+        at S1E3R, x19
+        at S1E1W, x19
+        at S1E2W, x19
+        at S1E3W, x19
+        at S1E0R, x19
+        at S1E0W, x19
+        at S12E1R, x20
+        at S12E1W, x20
+        at S12E0R, x20
+        at S12E0W, x20
+// CHECK: at      s1e1r, x19              // encoding: [0x13,0x78,0x08,0xd5]
+// CHECK: at      s1e2r, x19              // encoding: [0x13,0x78,0x0c,0xd5]
+// CHECK: at      s1e3r, x19              // encoding: [0x13,0x78,0x0e,0xd5]
+// CHECK: at      s1e1w, x19              // encoding: [0x33,0x78,0x08,0xd5]
+// CHECK: at      s1e2w, x19              // encoding: [0x33,0x78,0x0c,0xd5]
+// CHECK: at      s1e3w, x19              // encoding: [0x33,0x78,0x0e,0xd5]
+// CHECK: at      s1e0r, x19              // encoding: [0x53,0x78,0x08,0xd5]
+// CHECK: at      s1e0w, x19              // encoding: [0x73,0x78,0x08,0xd5]
+// CHECK: at      s12e1r, x20             // encoding: [0x94,0x78,0x0c,0xd5]
+// CHECK: at      s12e1w, x20             // encoding: [0xb4,0x78,0x0c,0xd5]
+// CHECK: at      s12e0r, x20             // encoding: [0xd4,0x78,0x0c,0xd5]
+// CHECK: at      s12e0w, x20             // encoding: [0xf4,0x78,0x0c,0xd5]
+
+        tlbi IPAS2E1IS, x4
+        tlbi IPAS2LE1IS, x9
+        tlbi VMALLE1IS
+        tlbi ALLE2IS
+        tlbi ALLE3IS
+        tlbi VAE1IS, x1
+        tlbi VAE2IS, x2
+        tlbi VAE3IS, x3
+        tlbi ASIDE1IS, x5
+        tlbi VAAE1IS, x9
+        tlbi ALLE1IS
+        tlbi VALE1IS, x10
+        tlbi VALE2IS, x11
+        tlbi VALE3IS, x13
+        tlbi VMALLS12E1IS
+        tlbi VAALE1IS, x14
+        tlbi IPAS2E1, x15
+        tlbi IPAS2LE1, x16
+        tlbi VMALLE1
+        tlbi ALLE2
+        tlbi ALLE3
+        tlbi VAE1, x17
+        tlbi VAE2, x18
+        tlbi VAE3, x19
+        tlbi ASIDE1, x20
+        tlbi VAAE1, x21
+        tlbi ALLE1
+        tlbi VALE1, x22
+        tlbi VALE2, x23
+        tlbi VALE3, x24
+        tlbi VMALLS12E1
+        tlbi VAALE1, x25
+// CHECK: tlbi    ipas2e1is, x4           // encoding: [0x24,0x80,0x0c,0xd5]
+// CHECK: tlbi    ipas2le1is, x9          // encoding: [0xa9,0x80,0x0c,0xd5]
+// CHECK: tlbi    vmalle1is               // encoding: [0x1f,0x83,0x08,0xd5]
+// CHECK: tlbi    alle2is                 // encoding: [0x1f,0x83,0x0c,0xd5]
+// CHECK: tlbi    alle3is                 // encoding: [0x1f,0x83,0x0e,0xd5]
+// CHECK: tlbi    vae1is, x1              // encoding: [0x21,0x83,0x08,0xd5]
+// CHECK: tlbi    vae2is, x2              // encoding: [0x22,0x83,0x0c,0xd5]
+// CHECK: tlbi    vae3is, x3              // encoding: [0x23,0x83,0x0e,0xd5]
+// CHECK: tlbi    aside1is, x5            // encoding: [0x45,0x83,0x08,0xd5]
+// CHECK: tlbi    vaae1is, x9             // encoding: [0x69,0x83,0x08,0xd5]
+// CHECK: tlbi    alle1is                 // encoding: [0x9f,0x83,0x0c,0xd5]
+// CHECK: tlbi    vale1is, x10            // encoding: [0xaa,0x83,0x08,0xd5]
+// CHECK: tlbi    vale2is, x11            // encoding: [0xab,0x83,0x0c,0xd5]
+// CHECK: tlbi    vale3is, x13            // encoding: [0xad,0x83,0x0e,0xd5]
+// CHECK: tlbi    vmalls12e1is            // encoding: [0xdf,0x83,0x0c,0xd5]
+// CHECK: tlbi    vaale1is, x14           // encoding: [0xee,0x83,0x08,0xd5]
+// CHECK: tlbi    ipas2e1, x15            // encoding: [0x2f,0x84,0x0c,0xd5]
+// CHECK: tlbi    ipas2le1, x16           // encoding: [0xb0,0x84,0x0c,0xd5]
+// CHECK: tlbi    vmalle1                 // encoding: [0x1f,0x87,0x08,0xd5]
+// CHECK: tlbi    alle2                   // encoding: [0x1f,0x87,0x0c,0xd5]
+// CHECK: tlbi    alle3                   // encoding: [0x1f,0x87,0x0e,0xd5]
+// CHECK: tlbi    vae1, x17               // encoding: [0x31,0x87,0x08,0xd5]
+// CHECK: tlbi    vae2, x18               // encoding: [0x32,0x87,0x0c,0xd5]
+// CHECK: tlbi    vae3, x19               // encoding: [0x33,0x87,0x0e,0xd5]
+// CHECK: tlbi    aside1, x20             // encoding: [0x54,0x87,0x08,0xd5]
+// CHECK: tlbi    vaae1, x21              // encoding: [0x75,0x87,0x08,0xd5]
+// CHECK: tlbi    alle1                   // encoding: [0x9f,0x87,0x0c,0xd5]
+// CHECK: tlbi    vale1, x22              // encoding: [0xb6,0x87,0x08,0xd5]
+// CHECK: tlbi    vale2, x23              // encoding: [0xb7,0x87,0x0c,0xd5]
+// CHECK: tlbi    vale3, x24              // encoding: [0xb8,0x87,0x0e,0xd5]
+// CHECK: tlbi    vmalls12e1              // encoding: [0xdf,0x87,0x0c,0xd5]
+// CHECK: tlbi    vaale1, x25             // encoding: [0xf9,0x87,0x08,0xd5]
+
+	msr TEECR32_EL1, x12
+	msr OSDTRRX_EL1, x12
+	msr MDCCINT_EL1, x12
+	msr MDSCR_EL1, x12
+	msr OSDTRTX_EL1, x12
+	msr DBGDTR_EL0, x12
+	msr DBGDTRTX_EL0, x12
+	msr OSECCR_EL1, x12
+	msr DBGVCR32_EL2, x12
+	msr DBGBVR0_EL1, x12
+	msr DBGBVR1_EL1, x12
+	msr DBGBVR2_EL1, x12
+	msr DBGBVR3_EL1, x12
+	msr DBGBVR4_EL1, x12
+	msr DBGBVR5_EL1, x12
+	msr DBGBVR6_EL1, x12
+	msr DBGBVR7_EL1, x12
+	msr DBGBVR8_EL1, x12
+	msr DBGBVR9_EL1, x12
+	msr DBGBVR10_EL1, x12
+	msr DBGBVR11_EL1, x12
+	msr DBGBVR12_EL1, x12
+	msr DBGBVR13_EL1, x12
+	msr DBGBVR14_EL1, x12
+	msr DBGBVR15_EL1, x12
+	msr DBGBCR0_EL1, x12
+	msr DBGBCR1_EL1, x12
+	msr DBGBCR2_EL1, x12
+	msr DBGBCR3_EL1, x12
+	msr DBGBCR4_EL1, x12
+	msr DBGBCR5_EL1, x12
+	msr DBGBCR6_EL1, x12
+	msr DBGBCR7_EL1, x12
+	msr DBGBCR8_EL1, x12
+	msr DBGBCR9_EL1, x12
+	msr DBGBCR10_EL1, x12
+	msr DBGBCR11_EL1, x12
+	msr DBGBCR12_EL1, x12
+	msr DBGBCR13_EL1, x12
+	msr DBGBCR14_EL1, x12
+	msr DBGBCR15_EL1, x12
+	msr DBGWVR0_EL1, x12
+	msr DBGWVR1_EL1, x12
+	msr DBGWVR2_EL1, x12
+	msr DBGWVR3_EL1, x12
+	msr DBGWVR4_EL1, x12
+	msr DBGWVR5_EL1, x12
+	msr DBGWVR6_EL1, x12
+	msr DBGWVR7_EL1, x12
+	msr DBGWVR8_EL1, x12
+	msr DBGWVR9_EL1, x12
+	msr DBGWVR10_EL1, x12
+	msr DBGWVR11_EL1, x12
+	msr DBGWVR12_EL1, x12
+	msr DBGWVR13_EL1, x12
+	msr DBGWVR14_EL1, x12
+	msr DBGWVR15_EL1, x12
+	msr DBGWCR0_EL1, x12
+	msr DBGWCR1_EL1, x12
+	msr DBGWCR2_EL1, x12
+	msr DBGWCR3_EL1, x12
+	msr DBGWCR4_EL1, x12
+	msr DBGWCR5_EL1, x12
+	msr DBGWCR6_EL1, x12
+	msr DBGWCR7_EL1, x12
+	msr DBGWCR8_EL1, x12
+	msr DBGWCR9_EL1, x12
+	msr DBGWCR10_EL1, x12
+	msr DBGWCR11_EL1, x12
+	msr DBGWCR12_EL1, x12
+	msr DBGWCR13_EL1, x12
+	msr DBGWCR14_EL1, x12
+	msr DBGWCR15_EL1, x12
+	msr TEEHBR32_EL1, x12
+	msr OSLAR_EL1, x12
+	msr OSDLR_EL1, x12
+	msr DBGPRCR_EL1, x12
+	msr DBGCLAIMSET_EL1, x12
+	msr DBGCLAIMCLR_EL1, x12
+	msr CSSELR_EL1, x12
+	msr VPIDR_EL2, x12
+	msr VMPIDR_EL2, x12
+	msr SCTLR_EL1, x12
+	msr SCTLR_EL2, x12
+	msr SCTLR_EL3, x12
+	msr ACTLR_EL1, x12
+	msr ACTLR_EL2, x12
+	msr ACTLR_EL3, x12
+	msr CPACR_EL1, x12
+	msr HCR_EL2, x12
+	msr SCR_EL3, x12
+	msr MDCR_EL2, x12
+	msr SDER32_EL3, x12
+	msr CPTR_EL2, x12
+	msr CPTR_EL3, x12
+	msr HSTR_EL2, x12
+	msr HACR_EL2, x12
+	msr MDCR_EL3, x12
+	msr TTBR0_EL1, x12
+	msr TTBR0_EL2, x12
+	msr TTBR0_EL3, x12
+	msr TTBR1_EL1, x12
+	msr TCR_EL1, x12
+	msr TCR_EL2, x12
+	msr TCR_EL3, x12
+	msr VTTBR_EL2, x12
+	msr VTCR_EL2, x12
+	msr DACR32_EL2, x12
+	msr SPSR_EL1, x12
+	msr SPSR_EL2, x12
+	msr SPSR_EL3, x12
+	msr ELR_EL1, x12
+	msr ELR_EL2, x12
+	msr ELR_EL3, x12
+	msr SP_EL0, x12
+	msr SP_EL1, x12
+	msr SP_EL2, x12
+	msr SPSel, x12
+	msr NZCV, x12
+	msr DAIF, x12
+	msr CurrentEL, x12
+	msr SPSR_irq, x12
+	msr SPSR_abt, x12
+	msr SPSR_und, x12
+	msr SPSR_fiq, x12
+	msr FPCR, x12
+	msr FPSR, x12
+	msr DSPSR_EL0, x12
+	msr DLR_EL0, x12
+	msr IFSR32_EL2, x12
+	msr AFSR0_EL1, x12
+	msr AFSR0_EL2, x12
+	msr AFSR0_EL3, x12
+	msr AFSR1_EL1, x12
+	msr AFSR1_EL2, x12
+	msr AFSR1_EL3, x12
+	msr ESR_EL1, x12
+	msr ESR_EL2, x12
+	msr ESR_EL3, x12
+	msr FPEXC32_EL2, x12
+	msr FAR_EL1, x12
+	msr FAR_EL2, x12
+	msr FAR_EL3, x12
+	msr HPFAR_EL2, x12
+	msr PAR_EL1, x12
+	msr PMCR_EL0, x12
+	msr PMCNTENSET_EL0, x12
+	msr PMCNTENCLR_EL0, x12
+	msr PMOVSCLR_EL0, x12
+	msr PMSELR_EL0, x12
+	msr PMCCNTR_EL0, x12
+	msr PMXEVTYPER_EL0, x12
+	msr PMXEVCNTR_EL0, x12
+	msr PMUSERENR_EL0, x12
+	msr PMINTENSET_EL1, x12
+	msr PMINTENCLR_EL1, x12
+	msr PMOVSSET_EL0, x12
+	msr MAIR_EL1, x12
+	msr MAIR_EL2, x12
+	msr MAIR_EL3, x12
+	msr AMAIR_EL1, x12
+	msr AMAIR_EL2, x12
+	msr AMAIR_EL3, x12
+	msr VBAR_EL1, x12
+	msr VBAR_EL2, x12
+	msr VBAR_EL3, x12
+	msr RMR_EL1, x12
+	msr RMR_EL2, x12
+	msr RMR_EL3, x12
+	msr CONTEXTIDR_EL1, x12
+	msr TPIDR_EL0, x12
+	msr TPIDR_EL2, x12
+	msr TPIDR_EL3, x12
+	msr TPIDRRO_EL0, x12
+	msr TPIDR_EL1, x12
+	msr CNTFRQ_EL0, x12
+	msr CNTVOFF_EL2, x12
+	msr CNTKCTL_EL1, x12
+	msr CNTHCTL_EL2, x12
+	msr CNTP_TVAL_EL0, x12
+	msr CNTHP_TVAL_EL2, x12
+	msr CNTPS_TVAL_EL1, x12
+	msr CNTP_CTL_EL0, x12
+	msr CNTHP_CTL_EL2, x12
+	msr CNTPS_CTL_EL1, x12
+	msr CNTP_CVAL_EL0, x12
+	msr CNTHP_CVAL_EL2, x12
+	msr CNTPS_CVAL_EL1, x12
+	msr CNTV_TVAL_EL0, x12
+	msr CNTV_CTL_EL0, x12
+	msr CNTV_CVAL_EL0, x12
+	msr PMEVCNTR0_EL0, x12
+	msr PMEVCNTR1_EL0, x12
+	msr PMEVCNTR2_EL0, x12
+	msr PMEVCNTR3_EL0, x12
+	msr PMEVCNTR4_EL0, x12
+	msr PMEVCNTR5_EL0, x12
+	msr PMEVCNTR6_EL0, x12
+	msr PMEVCNTR7_EL0, x12
+	msr PMEVCNTR8_EL0, x12
+	msr PMEVCNTR9_EL0, x12
+	msr PMEVCNTR10_EL0, x12
+	msr PMEVCNTR11_EL0, x12
+	msr PMEVCNTR12_EL0, x12
+	msr PMEVCNTR13_EL0, x12
+	msr PMEVCNTR14_EL0, x12
+	msr PMEVCNTR15_EL0, x12
+	msr PMEVCNTR16_EL0, x12
+	msr PMEVCNTR17_EL0, x12
+	msr PMEVCNTR18_EL0, x12
+	msr PMEVCNTR19_EL0, x12
+	msr PMEVCNTR20_EL0, x12
+	msr PMEVCNTR21_EL0, x12
+	msr PMEVCNTR22_EL0, x12
+	msr PMEVCNTR23_EL0, x12
+	msr PMEVCNTR24_EL0, x12
+	msr PMEVCNTR25_EL0, x12
+	msr PMEVCNTR26_EL0, x12
+	msr PMEVCNTR27_EL0, x12
+	msr PMEVCNTR28_EL0, x12
+	msr PMEVCNTR29_EL0, x12
+	msr PMEVCNTR30_EL0, x12
+	msr PMCCFILTR_EL0, x12
+	msr PMEVTYPER0_EL0, x12
+	msr PMEVTYPER1_EL0, x12
+	msr PMEVTYPER2_EL0, x12
+	msr PMEVTYPER3_EL0, x12
+	msr PMEVTYPER4_EL0, x12
+	msr PMEVTYPER5_EL0, x12
+	msr PMEVTYPER6_EL0, x12
+	msr PMEVTYPER7_EL0, x12
+	msr PMEVTYPER8_EL0, x12
+	msr PMEVTYPER9_EL0, x12
+	msr PMEVTYPER10_EL0, x12
+	msr PMEVTYPER11_EL0, x12
+	msr PMEVTYPER12_EL0, x12
+	msr PMEVTYPER13_EL0, x12
+	msr PMEVTYPER14_EL0, x12
+	msr PMEVTYPER15_EL0, x12
+	msr PMEVTYPER16_EL0, x12
+	msr PMEVTYPER17_EL0, x12
+	msr PMEVTYPER18_EL0, x12
+	msr PMEVTYPER19_EL0, x12
+	msr PMEVTYPER20_EL0, x12
+	msr PMEVTYPER21_EL0, x12
+	msr PMEVTYPER22_EL0, x12
+	msr PMEVTYPER23_EL0, x12
+	msr PMEVTYPER24_EL0, x12
+	msr PMEVTYPER25_EL0, x12
+	msr PMEVTYPER26_EL0, x12
+	msr PMEVTYPER27_EL0, x12
+	msr PMEVTYPER28_EL0, x12
+	msr PMEVTYPER29_EL0, x12
+	msr PMEVTYPER30_EL0, x12
+// CHECK: msr      teecr32_el1, x12           // encoding: [0x0c,0x00,0x12,0xd5]
+// CHECK: msr      osdtrrx_el1, x12           // encoding: [0x4c,0x00,0x10,0xd5]
+// CHECK: msr      mdccint_el1, x12           // encoding: [0x0c,0x02,0x10,0xd5]
+// CHECK: msr      mdscr_el1, x12             // encoding: [0x4c,0x02,0x10,0xd5]
+// CHECK: msr      osdtrtx_el1, x12           // encoding: [0x4c,0x03,0x10,0xd5]
+// CHECK: msr      dbgdtr_el0, x12            // encoding: [0x0c,0x04,0x13,0xd5]
+// CHECK: msr      dbgdtrtx_el0, x12          // encoding: [0x0c,0x05,0x13,0xd5]
+// CHECK: msr      oseccr_el1, x12            // encoding: [0x4c,0x06,0x10,0xd5]
+// CHECK: msr      dbgvcr32_el2, x12          // encoding: [0x0c,0x07,0x14,0xd5]
+// CHECK: msr      dbgbvr0_el1, x12           // encoding: [0x8c,0x00,0x10,0xd5]
+// CHECK: msr      dbgbvr1_el1, x12           // encoding: [0x8c,0x01,0x10,0xd5]
+// CHECK: msr      dbgbvr2_el1, x12           // encoding: [0x8c,0x02,0x10,0xd5]
+// CHECK: msr      dbgbvr3_el1, x12           // encoding: [0x8c,0x03,0x10,0xd5]
+// CHECK: msr      dbgbvr4_el1, x12           // encoding: [0x8c,0x04,0x10,0xd5]
+// CHECK: msr      dbgbvr5_el1, x12           // encoding: [0x8c,0x05,0x10,0xd5]
+// CHECK: msr      dbgbvr6_el1, x12           // encoding: [0x8c,0x06,0x10,0xd5]
+// CHECK: msr      dbgbvr7_el1, x12           // encoding: [0x8c,0x07,0x10,0xd5]
+// CHECK: msr      dbgbvr8_el1, x12           // encoding: [0x8c,0x08,0x10,0xd5]
+// CHECK: msr      dbgbvr9_el1, x12           // encoding: [0x8c,0x09,0x10,0xd5]
+// CHECK: msr      dbgbvr10_el1, x12          // encoding: [0x8c,0x0a,0x10,0xd5]
+// CHECK: msr      dbgbvr11_el1, x12          // encoding: [0x8c,0x0b,0x10,0xd5]
+// CHECK: msr      dbgbvr12_el1, x12          // encoding: [0x8c,0x0c,0x10,0xd5]
+// CHECK: msr      dbgbvr13_el1, x12          // encoding: [0x8c,0x0d,0x10,0xd5]
+// CHECK: msr      dbgbvr14_el1, x12          // encoding: [0x8c,0x0e,0x10,0xd5]
+// CHECK: msr      dbgbvr15_el1, x12          // encoding: [0x8c,0x0f,0x10,0xd5]
+// CHECK: msr      dbgbcr0_el1, x12           // encoding: [0xac,0x00,0x10,0xd5]
+// CHECK: msr      dbgbcr1_el1, x12           // encoding: [0xac,0x01,0x10,0xd5]
+// CHECK: msr      dbgbcr2_el1, x12           // encoding: [0xac,0x02,0x10,0xd5]
+// CHECK: msr      dbgbcr3_el1, x12           // encoding: [0xac,0x03,0x10,0xd5]
+// CHECK: msr      dbgbcr4_el1, x12           // encoding: [0xac,0x04,0x10,0xd5]
+// CHECK: msr      dbgbcr5_el1, x12           // encoding: [0xac,0x05,0x10,0xd5]
+// CHECK: msr      dbgbcr6_el1, x12           // encoding: [0xac,0x06,0x10,0xd5]
+// CHECK: msr      dbgbcr7_el1, x12           // encoding: [0xac,0x07,0x10,0xd5]
+// CHECK: msr      dbgbcr8_el1, x12           // encoding: [0xac,0x08,0x10,0xd5]
+// CHECK: msr      dbgbcr9_el1, x12           // encoding: [0xac,0x09,0x10,0xd5]
+// CHECK: msr      dbgbcr10_el1, x12          // encoding: [0xac,0x0a,0x10,0xd5]
+// CHECK: msr      dbgbcr11_el1, x12          // encoding: [0xac,0x0b,0x10,0xd5]
+// CHECK: msr      dbgbcr12_el1, x12          // encoding: [0xac,0x0c,0x10,0xd5]
+// CHECK: msr      dbgbcr13_el1, x12          // encoding: [0xac,0x0d,0x10,0xd5]
+// CHECK: msr      dbgbcr14_el1, x12          // encoding: [0xac,0x0e,0x10,0xd5]
+// CHECK: msr      dbgbcr15_el1, x12          // encoding: [0xac,0x0f,0x10,0xd5]
+// CHECK: msr      dbgwvr0_el1, x12           // encoding: [0xcc,0x00,0x10,0xd5]
+// CHECK: msr      dbgwvr1_el1, x12           // encoding: [0xcc,0x01,0x10,0xd5]
+// CHECK: msr      dbgwvr2_el1, x12           // encoding: [0xcc,0x02,0x10,0xd5]
+// CHECK: msr      dbgwvr3_el1, x12           // encoding: [0xcc,0x03,0x10,0xd5]
+// CHECK: msr      dbgwvr4_el1, x12           // encoding: [0xcc,0x04,0x10,0xd5]
+// CHECK: msr      dbgwvr5_el1, x12           // encoding: [0xcc,0x05,0x10,0xd5]
+// CHECK: msr      dbgwvr6_el1, x12           // encoding: [0xcc,0x06,0x10,0xd5]
+// CHECK: msr      dbgwvr7_el1, x12           // encoding: [0xcc,0x07,0x10,0xd5]
+// CHECK: msr      dbgwvr8_el1, x12           // encoding: [0xcc,0x08,0x10,0xd5]
+// CHECK: msr      dbgwvr9_el1, x12           // encoding: [0xcc,0x09,0x10,0xd5]
+// CHECK: msr      dbgwvr10_el1, x12          // encoding: [0xcc,0x0a,0x10,0xd5]
+// CHECK: msr      dbgwvr11_el1, x12          // encoding: [0xcc,0x0b,0x10,0xd5]
+// CHECK: msr      dbgwvr12_el1, x12          // encoding: [0xcc,0x0c,0x10,0xd5]
+// CHECK: msr      dbgwvr13_el1, x12          // encoding: [0xcc,0x0d,0x10,0xd5]
+// CHECK: msr      dbgwvr14_el1, x12          // encoding: [0xcc,0x0e,0x10,0xd5]
+// CHECK: msr      dbgwvr15_el1, x12          // encoding: [0xcc,0x0f,0x10,0xd5]
+// CHECK: msr      dbgwcr0_el1, x12           // encoding: [0xec,0x00,0x10,0xd5]
+// CHECK: msr      dbgwcr1_el1, x12           // encoding: [0xec,0x01,0x10,0xd5]
+// CHECK: msr      dbgwcr2_el1, x12           // encoding: [0xec,0x02,0x10,0xd5]
+// CHECK: msr      dbgwcr3_el1, x12           // encoding: [0xec,0x03,0x10,0xd5]
+// CHECK: msr      dbgwcr4_el1, x12           // encoding: [0xec,0x04,0x10,0xd5]
+// CHECK: msr      dbgwcr5_el1, x12           // encoding: [0xec,0x05,0x10,0xd5]
+// CHECK: msr      dbgwcr6_el1, x12           // encoding: [0xec,0x06,0x10,0xd5]
+// CHECK: msr      dbgwcr7_el1, x12           // encoding: [0xec,0x07,0x10,0xd5]
+// CHECK: msr      dbgwcr8_el1, x12           // encoding: [0xec,0x08,0x10,0xd5]
+// CHECK: msr      dbgwcr9_el1, x12           // encoding: [0xec,0x09,0x10,0xd5]
+// CHECK: msr      dbgwcr10_el1, x12          // encoding: [0xec,0x0a,0x10,0xd5]
+// CHECK: msr      dbgwcr11_el1, x12          // encoding: [0xec,0x0b,0x10,0xd5]
+// CHECK: msr      dbgwcr12_el1, x12          // encoding: [0xec,0x0c,0x10,0xd5]
+// CHECK: msr      dbgwcr13_el1, x12          // encoding: [0xec,0x0d,0x10,0xd5]
+// CHECK: msr      dbgwcr14_el1, x12          // encoding: [0xec,0x0e,0x10,0xd5]
+// CHECK: msr      dbgwcr15_el1, x12          // encoding: [0xec,0x0f,0x10,0xd5]
+// CHECK: msr      teehbr32_el1, x12          // encoding: [0x0c,0x10,0x12,0xd5]
+// CHECK: msr      oslar_el1, x12             // encoding: [0x8c,0x10,0x10,0xd5]
+// CHECK: msr      osdlr_el1, x12             // encoding: [0x8c,0x13,0x10,0xd5]
+// CHECK: msr      dbgprcr_el1, x12           // encoding: [0x8c,0x14,0x10,0xd5]
+// CHECK: msr      dbgclaimset_el1, x12       // encoding: [0xcc,0x78,0x10,0xd5]
+// CHECK: msr      dbgclaimclr_el1, x12       // encoding: [0xcc,0x79,0x10,0xd5]
+// CHECK: msr      csselr_el1, x12            // encoding: [0x0c,0x00,0x1a,0xd5]
+// CHECK: msr      vpidr_el2, x12             // encoding: [0x0c,0x00,0x1c,0xd5]
+// CHECK: msr      vmpidr_el2, x12            // encoding: [0xac,0x00,0x1c,0xd5]
+// CHECK: msr      sctlr_el1, x12             // encoding: [0x0c,0x10,0x18,0xd5]
+// CHECK: msr      sctlr_el2, x12             // encoding: [0x0c,0x10,0x1c,0xd5]
+// CHECK: msr      sctlr_el3, x12             // encoding: [0x0c,0x10,0x1e,0xd5]
+// CHECK: msr      actlr_el1, x12             // encoding: [0x2c,0x10,0x18,0xd5]
+// CHECK: msr      actlr_el2, x12             // encoding: [0x2c,0x10,0x1c,0xd5]
+// CHECK: msr      actlr_el3, x12             // encoding: [0x2c,0x10,0x1e,0xd5]
+// CHECK: msr      cpacr_el1, x12             // encoding: [0x4c,0x10,0x18,0xd5]
+// CHECK: msr      hcr_el2, x12               // encoding: [0x0c,0x11,0x1c,0xd5]
+// CHECK: msr      scr_el3, x12               // encoding: [0x0c,0x11,0x1e,0xd5]
+// CHECK: msr      mdcr_el2, x12              // encoding: [0x2c,0x11,0x1c,0xd5]
+// CHECK: msr      sder32_el3, x12            // encoding: [0x2c,0x11,0x1e,0xd5]
+// CHECK: msr      cptr_el2, x12              // encoding: [0x4c,0x11,0x1c,0xd5]
+// CHECK: msr      cptr_el3, x12              // encoding: [0x4c,0x11,0x1e,0xd5]
+// CHECK: msr      hstr_el2, x12              // encoding: [0x6c,0x11,0x1c,0xd5]
+// CHECK: msr      hacr_el2, x12              // encoding: [0xec,0x11,0x1c,0xd5]
+// CHECK: msr      mdcr_el3, x12              // encoding: [0x2c,0x13,0x1e,0xd5]
+// CHECK: msr      ttbr0_el1, x12             // encoding: [0x0c,0x20,0x18,0xd5]
+// CHECK: msr      ttbr0_el2, x12             // encoding: [0x0c,0x20,0x1c,0xd5]
+// CHECK: msr      ttbr0_el3, x12             // encoding: [0x0c,0x20,0x1e,0xd5]
+// CHECK: msr      ttbr1_el1, x12             // encoding: [0x2c,0x20,0x18,0xd5]
+// CHECK: msr      tcr_el1, x12               // encoding: [0x4c,0x20,0x18,0xd5]
+// CHECK: msr      tcr_el2, x12               // encoding: [0x4c,0x20,0x1c,0xd5]
+// CHECK: msr      tcr_el3, x12               // encoding: [0x4c,0x20,0x1e,0xd5]
+// CHECK: msr      vttbr_el2, x12             // encoding: [0x0c,0x21,0x1c,0xd5]
+// CHECK: msr      vtcr_el2, x12              // encoding: [0x4c,0x21,0x1c,0xd5]
+// CHECK: msr      dacr32_el2, x12            // encoding: [0x0c,0x30,0x1c,0xd5]
+// CHECK: msr      spsr_el1, x12              // encoding: [0x0c,0x40,0x18,0xd5]
+// CHECK: msr      spsr_el2, x12              // encoding: [0x0c,0x40,0x1c,0xd5]
+// CHECK: msr      spsr_el3, x12              // encoding: [0x0c,0x40,0x1e,0xd5]
+// CHECK: msr      elr_el1, x12               // encoding: [0x2c,0x40,0x18,0xd5]
+// CHECK: msr      elr_el2, x12               // encoding: [0x2c,0x40,0x1c,0xd5]
+// CHECK: msr      elr_el3, x12               // encoding: [0x2c,0x40,0x1e,0xd5]
+// CHECK: msr      sp_el0, x12                // encoding: [0x0c,0x41,0x18,0xd5]
+// CHECK: msr      sp_el1, x12                // encoding: [0x0c,0x41,0x1c,0xd5]
+// CHECK: msr      sp_el2, x12                // encoding: [0x0c,0x41,0x1e,0xd5]
+// CHECK: msr      spsel, x12                 // encoding: [0x0c,0x42,0x18,0xd5]
+// CHECK: msr      nzcv, x12                  // encoding: [0x0c,0x42,0x1b,0xd5]
+// CHECK: msr      daif, x12                  // encoding: [0x2c,0x42,0x1b,0xd5]
+// CHECK: msr      currentel, x12             // encoding: [0x4c,0x42,0x18,0xd5]
+// CHECK: msr      spsr_irq, x12              // encoding: [0x0c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_abt, x12              // encoding: [0x2c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_und, x12              // encoding: [0x4c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_fiq, x12              // encoding: [0x6c,0x43,0x1c,0xd5]
+// CHECK: msr      fpcr, x12                  // encoding: [0x0c,0x44,0x1b,0xd5]
+// CHECK: msr      fpsr, x12                  // encoding: [0x2c,0x44,0x1b,0xd5]
+// CHECK: msr      dspsr_el0, x12             // encoding: [0x0c,0x45,0x1b,0xd5]
+// CHECK: msr      dlr_el0, x12               // encoding: [0x2c,0x45,0x1b,0xd5]
+// CHECK: msr      ifsr32_el2, x12            // encoding: [0x2c,0x50,0x1c,0xd5]
+// CHECK: msr      afsr0_el1, x12             // encoding: [0x0c,0x51,0x18,0xd5]
+// CHECK: msr      afsr0_el2, x12             // encoding: [0x0c,0x51,0x1c,0xd5]
+// CHECK: msr      afsr0_el3, x12             // encoding: [0x0c,0x51,0x1e,0xd5]
+// CHECK: msr      afsr1_el1, x12             // encoding: [0x2c,0x51,0x18,0xd5]
+// CHECK: msr      afsr1_el2, x12             // encoding: [0x2c,0x51,0x1c,0xd5]
+// CHECK: msr      afsr1_el3, x12             // encoding: [0x2c,0x51,0x1e,0xd5]
+// CHECK: msr      esr_el1, x12               // encoding: [0x0c,0x52,0x18,0xd5]
+// CHECK: msr      esr_el2, x12               // encoding: [0x0c,0x52,0x1c,0xd5]
+// CHECK: msr      esr_el3, x12               // encoding: [0x0c,0x52,0x1e,0xd5]
+// CHECK: msr      fpexc32_el2, x12           // encoding: [0x0c,0x53,0x1c,0xd5]
+// CHECK: msr      far_el1, x12               // encoding: [0x0c,0x60,0x18,0xd5]
+// CHECK: msr      far_el2, x12               // encoding: [0x0c,0x60,0x1c,0xd5]
+// CHECK: msr      far_el3, x12               // encoding: [0x0c,0x60,0x1e,0xd5]
+// CHECK: msr      hpfar_el2, x12             // encoding: [0x8c,0x60,0x1c,0xd5]
+// CHECK: msr      par_el1, x12               // encoding: [0x0c,0x74,0x18,0xd5]
+// CHECK: msr      pmcr_el0, x12              // encoding: [0x0c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmcntenset_el0, x12        // encoding: [0x2c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmcntenclr_el0, x12        // encoding: [0x4c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmovsclr_el0, x12          // encoding: [0x6c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmselr_el0, x12            // encoding: [0xac,0x9c,0x1b,0xd5]
+// CHECK: msr      pmccntr_el0, x12           // encoding: [0x0c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmxevtyper_el0, x12        // encoding: [0x2c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmxevcntr_el0, x12         // encoding: [0x4c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmuserenr_el0, x12         // encoding: [0x0c,0x9e,0x1b,0xd5]
+// CHECK: msr      pmintenset_el1, x12        // encoding: [0x2c,0x9e,0x18,0xd5]
+// CHECK: msr      pmintenclr_el1, x12        // encoding: [0x4c,0x9e,0x18,0xd5]
+// CHECK: msr      pmovsset_el0, x12          // encoding: [0x6c,0x9e,0x1b,0xd5]
+// CHECK: msr      mair_el1, x12              // encoding: [0x0c,0xa2,0x18,0xd5]
+// CHECK: msr      mair_el2, x12              // encoding: [0x0c,0xa2,0x1c,0xd5]
+// CHECK: msr      mair_el3, x12              // encoding: [0x0c,0xa2,0x1e,0xd5]
+// CHECK: msr      amair_el1, x12             // encoding: [0x0c,0xa3,0x18,0xd5]
+// CHECK: msr      amair_el2, x12             // encoding: [0x0c,0xa3,0x1c,0xd5]
+// CHECK: msr      amair_el3, x12             // encoding: [0x0c,0xa3,0x1e,0xd5]
+// CHECK: msr      vbar_el1, x12              // encoding: [0x0c,0xc0,0x18,0xd5]
+// CHECK: msr      vbar_el2, x12              // encoding: [0x0c,0xc0,0x1c,0xd5]
+// CHECK: msr      vbar_el3, x12              // encoding: [0x0c,0xc0,0x1e,0xd5]
+// CHECK: msr      rmr_el1, x12               // encoding: [0x4c,0xc0,0x18,0xd5]
+// CHECK: msr      rmr_el2, x12               // encoding: [0x4c,0xc0,0x1c,0xd5]
+// CHECK: msr      rmr_el3, x12               // encoding: [0x4c,0xc0,0x1e,0xd5]
+// CHECK: msr      contextidr_el1, x12        // encoding: [0x2c,0xd0,0x18,0xd5]
+// CHECK: msr      tpidr_el0, x12             // encoding: [0x4c,0xd0,0x1b,0xd5]
+// CHECK: msr      tpidr_el2, x12             // encoding: [0x4c,0xd0,0x1c,0xd5]
+// CHECK: msr      tpidr_el3, x12             // encoding: [0x4c,0xd0,0x1e,0xd5]
+// CHECK: msr      tpidrro_el0, x12           // encoding: [0x6c,0xd0,0x1b,0xd5]
+// CHECK: msr      tpidr_el1, x12             // encoding: [0x8c,0xd0,0x18,0xd5]
+// CHECK: msr      cntfrq_el0, x12            // encoding: [0x0c,0xe0,0x1b,0xd5]
+// CHECK: msr      cntvoff_el2, x12           // encoding: [0x6c,0xe0,0x1c,0xd5]
+// CHECK: msr      cntkctl_el1, x12           // encoding: [0x0c,0xe1,0x18,0xd5]
+// CHECK: msr      cnthctl_el2, x12           // encoding: [0x0c,0xe1,0x1c,0xd5]
+// CHECK: msr      cntp_tval_el0, x12         // encoding: [0x0c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_tval_el2, x12        // encoding: [0x0c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_tval_el1, x12        // encoding: [0x0c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntp_ctl_el0, x12          // encoding: [0x2c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_ctl_el2, x12         // encoding: [0x2c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_ctl_el1, x12         // encoding: [0x2c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntp_cval_el0, x12         // encoding: [0x4c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_cval_el2, x12        // encoding: [0x4c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_cval_el1, x12        // encoding: [0x4c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntv_tval_el0, x12         // encoding: [0x0c,0xe3,0x1b,0xd5]
+// CHECK: msr      cntv_ctl_el0, x12          // encoding: [0x2c,0xe3,0x1b,0xd5]
+// CHECK: msr      cntv_cval_el0, x12         // encoding: [0x4c,0xe3,0x1b,0xd5]
+// CHECK: msr      pmevcntr0_el0, x12         // encoding: [0x0c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr1_el0, x12         // encoding: [0x2c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr2_el0, x12         // encoding: [0x4c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr3_el0, x12         // encoding: [0x6c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr4_el0, x12         // encoding: [0x8c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr5_el0, x12         // encoding: [0xac,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr6_el0, x12         // encoding: [0xcc,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr7_el0, x12         // encoding: [0xec,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr8_el0, x12         // encoding: [0x0c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr9_el0, x12         // encoding: [0x2c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr10_el0, x12        // encoding: [0x4c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr11_el0, x12        // encoding: [0x6c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr12_el0, x12        // encoding: [0x8c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr13_el0, x12        // encoding: [0xac,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr14_el0, x12        // encoding: [0xcc,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr15_el0, x12        // encoding: [0xec,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr16_el0, x12        // encoding: [0x0c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr17_el0, x12        // encoding: [0x2c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr18_el0, x12        // encoding: [0x4c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr19_el0, x12        // encoding: [0x6c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr20_el0, x12        // encoding: [0x8c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr21_el0, x12        // encoding: [0xac,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr22_el0, x12        // encoding: [0xcc,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr23_el0, x12        // encoding: [0xec,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr24_el0, x12        // encoding: [0x0c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr25_el0, x12        // encoding: [0x2c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr26_el0, x12        // encoding: [0x4c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr27_el0, x12        // encoding: [0x6c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr28_el0, x12        // encoding: [0x8c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr29_el0, x12        // encoding: [0xac,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr30_el0, x12        // encoding: [0xcc,0xeb,0x1b,0xd5]
+// CHECK: msr      pmccfiltr_el0, x12         // encoding: [0xec,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper0_el0, x12        // encoding: [0x0c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper1_el0, x12        // encoding: [0x2c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper2_el0, x12        // encoding: [0x4c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper3_el0, x12        // encoding: [0x6c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper4_el0, x12        // encoding: [0x8c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper5_el0, x12        // encoding: [0xac,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper6_el0, x12        // encoding: [0xcc,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper7_el0, x12        // encoding: [0xec,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper8_el0, x12        // encoding: [0x0c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper9_el0, x12        // encoding: [0x2c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper10_el0, x12       // encoding: [0x4c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper11_el0, x12       // encoding: [0x6c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper12_el0, x12       // encoding: [0x8c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper13_el0, x12       // encoding: [0xac,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper14_el0, x12       // encoding: [0xcc,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper15_el0, x12       // encoding: [0xec,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper16_el0, x12       // encoding: [0x0c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper17_el0, x12       // encoding: [0x2c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper18_el0, x12       // encoding: [0x4c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper19_el0, x12       // encoding: [0x6c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper20_el0, x12       // encoding: [0x8c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper21_el0, x12       // encoding: [0xac,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper22_el0, x12       // encoding: [0xcc,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper23_el0, x12       // encoding: [0xec,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper24_el0, x12       // encoding: [0x0c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper25_el0, x12       // encoding: [0x2c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper26_el0, x12       // encoding: [0x4c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper27_el0, x12       // encoding: [0x6c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper28_el0, x12       // encoding: [0x8c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper29_el0, x12       // encoding: [0xac,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper30_el0, x12       // encoding: [0xcc,0xef,0x1b,0xd5]
+
+	mrs x9, TEECR32_EL1
+	mrs x9, OSDTRRX_EL1
+	mrs x9, MDCCSR_EL0
+	mrs x9, MDCCINT_EL1
+	mrs x9, MDSCR_EL1
+	mrs x9, OSDTRTX_EL1
+	mrs x9, DBGDTR_EL0
+	mrs x9, DBGDTRRX_EL0
+	mrs x9, OSECCR_EL1
+	mrs x9, DBGVCR32_EL2
+	mrs x9, DBGBVR0_EL1
+	mrs x9, DBGBVR1_EL1
+	mrs x9, DBGBVR2_EL1
+	mrs x9, DBGBVR3_EL1
+	mrs x9, DBGBVR4_EL1
+	mrs x9, DBGBVR5_EL1
+	mrs x9, DBGBVR6_EL1
+	mrs x9, DBGBVR7_EL1
+	mrs x9, DBGBVR8_EL1
+	mrs x9, DBGBVR9_EL1
+	mrs x9, DBGBVR10_EL1
+	mrs x9, DBGBVR11_EL1
+	mrs x9, DBGBVR12_EL1
+	mrs x9, DBGBVR13_EL1
+	mrs x9, DBGBVR14_EL1
+	mrs x9, DBGBVR15_EL1
+	mrs x9, DBGBCR0_EL1
+	mrs x9, DBGBCR1_EL1
+	mrs x9, DBGBCR2_EL1
+	mrs x9, DBGBCR3_EL1
+	mrs x9, DBGBCR4_EL1
+	mrs x9, DBGBCR5_EL1
+	mrs x9, DBGBCR6_EL1
+	mrs x9, DBGBCR7_EL1
+	mrs x9, DBGBCR8_EL1
+	mrs x9, DBGBCR9_EL1
+	mrs x9, DBGBCR10_EL1
+	mrs x9, DBGBCR11_EL1
+	mrs x9, DBGBCR12_EL1
+	mrs x9, DBGBCR13_EL1
+	mrs x9, DBGBCR14_EL1
+	mrs x9, DBGBCR15_EL1
+	mrs x9, DBGWVR0_EL1
+	mrs x9, DBGWVR1_EL1
+	mrs x9, DBGWVR2_EL1
+	mrs x9, DBGWVR3_EL1
+	mrs x9, DBGWVR4_EL1
+	mrs x9, DBGWVR5_EL1
+	mrs x9, DBGWVR6_EL1
+	mrs x9, DBGWVR7_EL1
+	mrs x9, DBGWVR8_EL1
+	mrs x9, DBGWVR9_EL1
+	mrs x9, DBGWVR10_EL1
+	mrs x9, DBGWVR11_EL1
+	mrs x9, DBGWVR12_EL1
+	mrs x9, DBGWVR13_EL1
+	mrs x9, DBGWVR14_EL1
+	mrs x9, DBGWVR15_EL1
+	mrs x9, DBGWCR0_EL1
+	mrs x9, DBGWCR1_EL1
+	mrs x9, DBGWCR2_EL1
+	mrs x9, DBGWCR3_EL1
+	mrs x9, DBGWCR4_EL1
+	mrs x9, DBGWCR5_EL1
+	mrs x9, DBGWCR6_EL1
+	mrs x9, DBGWCR7_EL1
+	mrs x9, DBGWCR8_EL1
+	mrs x9, DBGWCR9_EL1
+	mrs x9, DBGWCR10_EL1
+	mrs x9, DBGWCR11_EL1
+	mrs x9, DBGWCR12_EL1
+	mrs x9, DBGWCR13_EL1
+	mrs x9, DBGWCR14_EL1
+	mrs x9, DBGWCR15_EL1
+	mrs x9, MDRAR_EL1
+	mrs x9, TEEHBR32_EL1
+	mrs x9, OSLSR_EL1
+	mrs x9, OSDLR_EL1
+	mrs x9, DBGPRCR_EL1
+	mrs x9, DBGCLAIMSET_EL1
+	mrs x9, DBGCLAIMCLR_EL1
+	mrs x9, DBGAUTHSTATUS_EL1
+	mrs x9, MIDR_EL1
+	mrs x9, CCSIDR_EL1
+	mrs x9, CSSELR_EL1
+	mrs x9, VPIDR_EL2
+	mrs x9, CLIDR_EL1
+	mrs x9, CTR_EL0
+	mrs x9, MPIDR_EL1
+	mrs x9, VMPIDR_EL2
+	mrs x9, REVIDR_EL1
+	mrs x9, AIDR_EL1
+	mrs x9, DCZID_EL0
+	mrs x9, ID_PFR0_EL1
+	mrs x9, ID_PFR1_EL1
+	mrs x9, ID_DFR0_EL1
+	mrs x9, ID_AFR0_EL1
+	mrs x9, ID_MMFR0_EL1
+	mrs x9, ID_MMFR1_EL1
+	mrs x9, ID_MMFR2_EL1
+	mrs x9, ID_MMFR3_EL1
+	mrs x9, ID_ISAR0_EL1
+	mrs x9, ID_ISAR1_EL1
+	mrs x9, ID_ISAR2_EL1
+	mrs x9, ID_ISAR3_EL1
+	mrs x9, ID_ISAR4_EL1
+	mrs x9, ID_ISAR5_EL1
+	mrs x9, MVFR0_EL1
+	mrs x9, MVFR1_EL1
+	mrs x9, MVFR2_EL1
+	mrs x9, ID_AA64PFR0_EL1
+	mrs x9, ID_AA64PFR1_EL1
+	mrs x9, ID_AA64DFR0_EL1
+	mrs x9, ID_AA64DFR1_EL1
+	mrs x9, ID_AA64AFR0_EL1
+	mrs x9, ID_AA64AFR1_EL1
+	mrs x9, ID_AA64ISAR0_EL1
+	mrs x9, ID_AA64ISAR1_EL1
+	mrs x9, ID_AA64MMFR0_EL1
+	mrs x9, ID_AA64MMFR1_EL1
+	mrs x9, SCTLR_EL1
+	mrs x9, SCTLR_EL2
+	mrs x9, SCTLR_EL3
+	mrs x9, ACTLR_EL1
+	mrs x9, ACTLR_EL2
+	mrs x9, ACTLR_EL3
+	mrs x9, CPACR_EL1
+	mrs x9, HCR_EL2
+	mrs x9, SCR_EL3
+	mrs x9, MDCR_EL2
+	mrs x9, SDER32_EL3
+	mrs x9, CPTR_EL2
+	mrs x9, CPTR_EL3
+	mrs x9, HSTR_EL2
+	mrs x9, HACR_EL2
+	mrs x9, MDCR_EL3
+	mrs x9, TTBR0_EL1
+	mrs x9, TTBR0_EL2
+	mrs x9, TTBR0_EL3
+	mrs x9, TTBR1_EL1
+	mrs x9, TCR_EL1
+	mrs x9, TCR_EL2
+	mrs x9, TCR_EL3
+	mrs x9, VTTBR_EL2
+	mrs x9, VTCR_EL2
+	mrs x9, DACR32_EL2
+	mrs x9, SPSR_EL1
+	mrs x9, SPSR_EL2
+	mrs x9, SPSR_EL3
+	mrs x9, ELR_EL1
+	mrs x9, ELR_EL2
+	mrs x9, ELR_EL3
+	mrs x9, SP_EL0
+	mrs x9, SP_EL1
+	mrs x9, SP_EL2
+	mrs x9, SPSel
+	mrs x9, NZCV
+	mrs x9, DAIF
+	mrs x9, CurrentEL
+	mrs x9, SPSR_irq
+	mrs x9, SPSR_abt
+	mrs x9, SPSR_und
+	mrs x9, SPSR_fiq
+	mrs x9, FPCR
+	mrs x9, FPSR
+	mrs x9, DSPSR_EL0
+	mrs x9, DLR_EL0
+	mrs x9, IFSR32_EL2
+	mrs x9, AFSR0_EL1
+	mrs x9, AFSR0_EL2
+	mrs x9, AFSR0_EL3
+	mrs x9, AFSR1_EL1
+	mrs x9, AFSR1_EL2
+	mrs x9, AFSR1_EL3
+	mrs x9, ESR_EL1
+	mrs x9, ESR_EL2
+	mrs x9, ESR_EL3
+	mrs x9, FPEXC32_EL2
+	mrs x9, FAR_EL1
+	mrs x9, FAR_EL2
+	mrs x9, FAR_EL3
+	mrs x9, HPFAR_EL2
+	mrs x9, PAR_EL1
+	mrs x9, PMCR_EL0
+	mrs x9, PMCNTENSET_EL0
+	mrs x9, PMCNTENCLR_EL0
+	mrs x9, PMOVSCLR_EL0
+	mrs x9, PMSELR_EL0
+	mrs x9, PMCEID0_EL0
+	mrs x9, PMCEID1_EL0
+	mrs x9, PMCCNTR_EL0
+	mrs x9, PMXEVTYPER_EL0
+	mrs x9, PMXEVCNTR_EL0
+	mrs x9, PMUSERENR_EL0
+	mrs x9, PMINTENSET_EL1
+	mrs x9, PMINTENCLR_EL1
+	mrs x9, PMOVSSET_EL0
+	mrs x9, MAIR_EL1
+	mrs x9, MAIR_EL2
+	mrs x9, MAIR_EL3
+	mrs x9, AMAIR_EL1
+	mrs x9, AMAIR_EL2
+	mrs x9, AMAIR_EL3
+	mrs x9, VBAR_EL1
+	mrs x9, VBAR_EL2
+	mrs x9, VBAR_EL3
+	mrs x9, RVBAR_EL1
+	mrs x9, RVBAR_EL2
+	mrs x9, RVBAR_EL3
+	mrs x9, RMR_EL1
+	mrs x9, RMR_EL2
+	mrs x9, RMR_EL3
+	mrs x9, ISR_EL1
+	mrs x9, CONTEXTIDR_EL1
+	mrs x9, TPIDR_EL0
+	mrs x9, TPIDR_EL2
+	mrs x9, TPIDR_EL3
+	mrs x9, TPIDRRO_EL0
+	mrs x9, TPIDR_EL1
+	mrs x9, CNTFRQ_EL0
+	mrs x9, CNTPCT_EL0
+	mrs x9, CNTVCT_EL0
+	mrs x9, CNTVOFF_EL2
+	mrs x9, CNTKCTL_EL1
+	mrs x9, CNTHCTL_EL2
+	mrs x9, CNTP_TVAL_EL0
+	mrs x9, CNTHP_TVAL_EL2
+	mrs x9, CNTPS_TVAL_EL1
+	mrs x9, CNTP_CTL_EL0
+	mrs x9, CNTHP_CTL_EL2
+	mrs x9, CNTPS_CTL_EL1
+	mrs x9, CNTP_CVAL_EL0
+	mrs x9, CNTHP_CVAL_EL2
+	mrs x9, CNTPS_CVAL_EL1
+	mrs x9, CNTV_TVAL_EL0
+	mrs x9, CNTV_CTL_EL0
+	mrs x9, CNTV_CVAL_EL0
+	mrs x9, PMEVCNTR0_EL0
+	mrs x9, PMEVCNTR1_EL0
+	mrs x9, PMEVCNTR2_EL0
+	mrs x9, PMEVCNTR3_EL0
+	mrs x9, PMEVCNTR4_EL0
+	mrs x9, PMEVCNTR5_EL0
+	mrs x9, PMEVCNTR6_EL0
+	mrs x9, PMEVCNTR7_EL0
+	mrs x9, PMEVCNTR8_EL0
+	mrs x9, PMEVCNTR9_EL0
+	mrs x9, PMEVCNTR10_EL0
+	mrs x9, PMEVCNTR11_EL0
+	mrs x9, PMEVCNTR12_EL0
+	mrs x9, PMEVCNTR13_EL0
+	mrs x9, PMEVCNTR14_EL0
+	mrs x9, PMEVCNTR15_EL0
+	mrs x9, PMEVCNTR16_EL0
+	mrs x9, PMEVCNTR17_EL0
+	mrs x9, PMEVCNTR18_EL0
+	mrs x9, PMEVCNTR19_EL0
+	mrs x9, PMEVCNTR20_EL0
+	mrs x9, PMEVCNTR21_EL0
+	mrs x9, PMEVCNTR22_EL0
+	mrs x9, PMEVCNTR23_EL0
+	mrs x9, PMEVCNTR24_EL0
+	mrs x9, PMEVCNTR25_EL0
+	mrs x9, PMEVCNTR26_EL0
+	mrs x9, PMEVCNTR27_EL0
+	mrs x9, PMEVCNTR28_EL0
+	mrs x9, PMEVCNTR29_EL0
+	mrs x9, PMEVCNTR30_EL0
+	mrs x9, PMCCFILTR_EL0
+	mrs x9, PMEVTYPER0_EL0
+	mrs x9, PMEVTYPER1_EL0
+	mrs x9, PMEVTYPER2_EL0
+	mrs x9, PMEVTYPER3_EL0
+	mrs x9, PMEVTYPER4_EL0
+	mrs x9, PMEVTYPER5_EL0
+	mrs x9, PMEVTYPER6_EL0
+	mrs x9, PMEVTYPER7_EL0
+	mrs x9, PMEVTYPER8_EL0
+	mrs x9, PMEVTYPER9_EL0
+	mrs x9, PMEVTYPER10_EL0
+	mrs x9, PMEVTYPER11_EL0
+	mrs x9, PMEVTYPER12_EL0
+	mrs x9, PMEVTYPER13_EL0
+	mrs x9, PMEVTYPER14_EL0
+	mrs x9, PMEVTYPER15_EL0
+	mrs x9, PMEVTYPER16_EL0
+	mrs x9, PMEVTYPER17_EL0
+	mrs x9, PMEVTYPER18_EL0
+	mrs x9, PMEVTYPER19_EL0
+	mrs x9, PMEVTYPER20_EL0
+	mrs x9, PMEVTYPER21_EL0
+	mrs x9, PMEVTYPER22_EL0
+	mrs x9, PMEVTYPER23_EL0
+	mrs x9, PMEVTYPER24_EL0
+	mrs x9, PMEVTYPER25_EL0
+	mrs x9, PMEVTYPER26_EL0
+	mrs x9, PMEVTYPER27_EL0
+	mrs x9, PMEVTYPER28_EL0
+	mrs x9, PMEVTYPER29_EL0
+	mrs x9, PMEVTYPER30_EL0
+// CHECK: mrs      x9, teecr32_el1            // encoding: [0x09,0x00,0x32,0xd5]
+// CHECK: mrs      x9, osdtrrx_el1            // encoding: [0x49,0x00,0x30,0xd5]
+// CHECK: mrs      x9, mdccsr_el0             // encoding: [0x09,0x01,0x33,0xd5]
+// CHECK: mrs      x9, mdccint_el1            // encoding: [0x09,0x02,0x30,0xd5]
+// CHECK: mrs      x9, mdscr_el1              // encoding: [0x49,0x02,0x30,0xd5]
+// CHECK: mrs      x9, osdtrtx_el1            // encoding: [0x49,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgdtr_el0             // encoding: [0x09,0x04,0x33,0xd5]
+// CHECK: mrs      x9, dbgdtrrx_el0           // encoding: [0x09,0x05,0x33,0xd5]
+// CHECK: mrs      x9, oseccr_el1             // encoding: [0x49,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgvcr32_el2           // encoding: [0x09,0x07,0x34,0xd5]
+// CHECK: mrs      x9, dbgbvr0_el1            // encoding: [0x89,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr1_el1            // encoding: [0x89,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr2_el1            // encoding: [0x89,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr3_el1            // encoding: [0x89,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr4_el1            // encoding: [0x89,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr5_el1            // encoding: [0x89,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr6_el1            // encoding: [0x89,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr7_el1            // encoding: [0x89,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr8_el1            // encoding: [0x89,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr9_el1            // encoding: [0x89,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr10_el1           // encoding: [0x89,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr11_el1           // encoding: [0x89,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr12_el1           // encoding: [0x89,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr13_el1           // encoding: [0x89,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr14_el1           // encoding: [0x89,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr15_el1           // encoding: [0x89,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr0_el1            // encoding: [0xa9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr1_el1            // encoding: [0xa9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr2_el1            // encoding: [0xa9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr3_el1            // encoding: [0xa9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr4_el1            // encoding: [0xa9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr5_el1            // encoding: [0xa9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr6_el1            // encoding: [0xa9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr7_el1            // encoding: [0xa9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr8_el1            // encoding: [0xa9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr9_el1            // encoding: [0xa9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr10_el1           // encoding: [0xa9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr11_el1           // encoding: [0xa9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr12_el1           // encoding: [0xa9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr13_el1           // encoding: [0xa9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr14_el1           // encoding: [0xa9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr15_el1           // encoding: [0xa9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr0_el1            // encoding: [0xc9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr1_el1            // encoding: [0xc9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr2_el1            // encoding: [0xc9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr3_el1            // encoding: [0xc9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr4_el1            // encoding: [0xc9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr5_el1            // encoding: [0xc9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr6_el1            // encoding: [0xc9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr7_el1            // encoding: [0xc9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr8_el1            // encoding: [0xc9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr9_el1            // encoding: [0xc9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr10_el1           // encoding: [0xc9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr11_el1           // encoding: [0xc9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr12_el1           // encoding: [0xc9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr13_el1           // encoding: [0xc9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr14_el1           // encoding: [0xc9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr15_el1           // encoding: [0xc9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr0_el1            // encoding: [0xe9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr1_el1            // encoding: [0xe9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr2_el1            // encoding: [0xe9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr3_el1            // encoding: [0xe9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr4_el1            // encoding: [0xe9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr5_el1            // encoding: [0xe9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr6_el1            // encoding: [0xe9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr7_el1            // encoding: [0xe9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr8_el1            // encoding: [0xe9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr9_el1            // encoding: [0xe9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr10_el1           // encoding: [0xe9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr11_el1           // encoding: [0xe9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr12_el1           // encoding: [0xe9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr13_el1           // encoding: [0xe9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr14_el1           // encoding: [0xe9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr15_el1           // encoding: [0xe9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, mdrar_el1              // encoding: [0x09,0x10,0x30,0xd5]
+// CHECK: mrs      x9, teehbr32_el1           // encoding: [0x09,0x10,0x32,0xd5]
+// CHECK: mrs      x9, oslsr_el1              // encoding: [0x89,0x11,0x30,0xd5]
+// CHECK: mrs      x9, osdlr_el1              // encoding: [0x89,0x13,0x30,0xd5]
+// CHECK: mrs      x9, dbgprcr_el1            // encoding: [0x89,0x14,0x30,0xd5]
+// CHECK: mrs      x9, dbgclaimset_el1        // encoding: [0xc9,0x78,0x30,0xd5]
+// CHECK: mrs      x9, dbgclaimclr_el1        // encoding: [0xc9,0x79,0x30,0xd5]
+// CHECK: mrs      x9, dbgauthstatus_el1      // encoding: [0xc9,0x7e,0x30,0xd5]
+// CHECK: mrs      x9, midr_el1               // encoding: [0x09,0x00,0x38,0xd5]
+// CHECK: mrs      x9, ccsidr_el1             // encoding: [0x09,0x00,0x39,0xd5]
+// CHECK: mrs      x9, csselr_el1             // encoding: [0x09,0x00,0x3a,0xd5]
+// CHECK: mrs      x9, vpidr_el2              // encoding: [0x09,0x00,0x3c,0xd5]
+// CHECK: mrs      x9, clidr_el1              // encoding: [0x29,0x00,0x39,0xd5]
+// CHECK: mrs      x9, ctr_el0                // encoding: [0x29,0x00,0x3b,0xd5]
+// CHECK: mrs      x9, mpidr_el1              // encoding: [0xa9,0x00,0x38,0xd5]
+// CHECK: mrs      x9, vmpidr_el2             // encoding: [0xa9,0x00,0x3c,0xd5]
+// CHECK: mrs      x9, revidr_el1             // encoding: [0xc9,0x00,0x38,0xd5]
+// CHECK: mrs      x9, aidr_el1               // encoding: [0xe9,0x00,0x39,0xd5]
+// CHECK: mrs      x9, dczid_el0              // encoding: [0xe9,0x00,0x3b,0xd5]
+// CHECK: mrs      x9, id_pfr0_el1            // encoding: [0x09,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_pfr1_el1            // encoding: [0x29,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_dfr0_el1            // encoding: [0x49,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_afr0_el1            // encoding: [0x69,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr0_el1           // encoding: [0x89,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr1_el1           // encoding: [0xa9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr2_el1           // encoding: [0xc9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr3_el1           // encoding: [0xe9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_isar0_el1           // encoding: [0x09,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar1_el1           // encoding: [0x29,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar2_el1           // encoding: [0x49,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar3_el1           // encoding: [0x69,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar4_el1           // encoding: [0x89,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar5_el1           // encoding: [0xa9,0x02,0x38,0xd5]
+// CHECK: mrs      x9, mvfr0_el1              // encoding: [0x09,0x03,0x38,0xd5]
+// CHECK: mrs      x9, mvfr1_el1              // encoding: [0x29,0x03,0x38,0xd5]
+// CHECK: mrs      x9, mvfr2_el1              // encoding: [0x49,0x03,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64pfr0_el1        // encoding: [0x09,0x04,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64pfr1_el1        // encoding: [0x29,0x04,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64dfr0_el1        // encoding: [0x09,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64dfr1_el1        // encoding: [0x29,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64afr0_el1        // encoding: [0x89,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64afr1_el1        // encoding: [0xa9,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64isar0_el1       // encoding: [0x09,0x06,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64isar1_el1       // encoding: [0x29,0x06,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64mmfr0_el1       // encoding: [0x09,0x07,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64mmfr1_el1       // encoding: [0x29,0x07,0x38,0xd5]
+// CHECK: mrs      x9, sctlr_el1              // encoding: [0x09,0x10,0x38,0xd5]
+// CHECK: mrs      x9, sctlr_el2              // encoding: [0x09,0x10,0x3c,0xd5]
+// CHECK: mrs      x9, sctlr_el3              // encoding: [0x09,0x10,0x3e,0xd5]
+// CHECK: mrs      x9, actlr_el1              // encoding: [0x29,0x10,0x38,0xd5]
+// CHECK: mrs      x9, actlr_el2              // encoding: [0x29,0x10,0x3c,0xd5]
+// CHECK: mrs      x9, actlr_el3              // encoding: [0x29,0x10,0x3e,0xd5]
+// CHECK: mrs      x9, cpacr_el1              // encoding: [0x49,0x10,0x38,0xd5]
+// CHECK: mrs      x9, hcr_el2                // encoding: [0x09,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, scr_el3                // encoding: [0x09,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, mdcr_el2               // encoding: [0x29,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, sder32_el3             // encoding: [0x29,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, cptr_el2               // encoding: [0x49,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, cptr_el3               // encoding: [0x49,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, hstr_el2               // encoding: [0x69,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, hacr_el2               // encoding: [0xe9,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, mdcr_el3               // encoding: [0x29,0x13,0x3e,0xd5]
+// CHECK: mrs      x9, ttbr0_el1              // encoding: [0x09,0x20,0x38,0xd5]
+// CHECK: mrs      x9, ttbr0_el2              // encoding: [0x09,0x20,0x3c,0xd5]
+// CHECK: mrs      x9, ttbr0_el3              // encoding: [0x09,0x20,0x3e,0xd5]
+// CHECK: mrs      x9, ttbr1_el1              // encoding: [0x29,0x20,0x38,0xd5]
+// CHECK: mrs      x9, tcr_el1                // encoding: [0x49,0x20,0x38,0xd5]
+// CHECK: mrs      x9, tcr_el2                // encoding: [0x49,0x20,0x3c,0xd5]
+// CHECK: mrs      x9, tcr_el3                // encoding: [0x49,0x20,0x3e,0xd5]
+// CHECK: mrs      x9, vttbr_el2              // encoding: [0x09,0x21,0x3c,0xd5]
+// CHECK: mrs      x9, vtcr_el2               // encoding: [0x49,0x21,0x3c,0xd5]
+// CHECK: mrs      x9, dacr32_el2             // encoding: [0x09,0x30,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_el1               // encoding: [0x09,0x40,0x38,0xd5]
+// CHECK: mrs      x9, spsr_el2               // encoding: [0x09,0x40,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_el3               // encoding: [0x09,0x40,0x3e,0xd5]
+// CHECK: mrs      x9, elr_el1                // encoding: [0x29,0x40,0x38,0xd5]
+// CHECK: mrs      x9, elr_el2                // encoding: [0x29,0x40,0x3c,0xd5]
+// CHECK: mrs      x9, elr_el3                // encoding: [0x29,0x40,0x3e,0xd5]
+// CHECK: mrs      x9, sp_el0                 // encoding: [0x09,0x41,0x38,0xd5]
+// CHECK: mrs      x9, sp_el1                 // encoding: [0x09,0x41,0x3c,0xd5]
+// CHECK: mrs      x9, sp_el2                 // encoding: [0x09,0x41,0x3e,0xd5]
+// CHECK: mrs      x9, spsel                  // encoding: [0x09,0x42,0x38,0xd5]
+// CHECK: mrs      x9, nzcv                   // encoding: [0x09,0x42,0x3b,0xd5]
+// CHECK: mrs      x9, daif                   // encoding: [0x29,0x42,0x3b,0xd5]
+// CHECK: mrs      x9, currentel              // encoding: [0x49,0x42,0x38,0xd5]
+// CHECK: mrs      x9, spsr_irq               // encoding: [0x09,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_abt               // encoding: [0x29,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_und               // encoding: [0x49,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_fiq               // encoding: [0x69,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, fpcr                   // encoding: [0x09,0x44,0x3b,0xd5]
+// CHECK: mrs      x9, fpsr                   // encoding: [0x29,0x44,0x3b,0xd5]
+// CHECK: mrs      x9, dspsr_el0              // encoding: [0x09,0x45,0x3b,0xd5]
+// CHECK: mrs      x9, dlr_el0                // encoding: [0x29,0x45,0x3b,0xd5]
+// CHECK: mrs      x9, ifsr32_el2             // encoding: [0x29,0x50,0x3c,0xd5]
+// CHECK: mrs      x9, afsr0_el1              // encoding: [0x09,0x51,0x38,0xd5]
+// CHECK: mrs      x9, afsr0_el2              // encoding: [0x09,0x51,0x3c,0xd5]
+// CHECK: mrs      x9, afsr0_el3              // encoding: [0x09,0x51,0x3e,0xd5]
+// CHECK: mrs      x9, afsr1_el1              // encoding: [0x29,0x51,0x38,0xd5]
+// CHECK: mrs      x9, afsr1_el2              // encoding: [0x29,0x51,0x3c,0xd5]
+// CHECK: mrs      x9, afsr1_el3              // encoding: [0x29,0x51,0x3e,0xd5]
+// CHECK: mrs      x9, esr_el1                // encoding: [0x09,0x52,0x38,0xd5]
+// CHECK: mrs      x9, esr_el2                // encoding: [0x09,0x52,0x3c,0xd5]
+// CHECK: mrs      x9, esr_el3                // encoding: [0x09,0x52,0x3e,0xd5]
+// CHECK: mrs      x9, fpexc32_el2            // encoding: [0x09,0x53,0x3c,0xd5]
+// CHECK: mrs      x9, far_el1                // encoding: [0x09,0x60,0x38,0xd5]
+// CHECK: mrs      x9, far_el2                // encoding: [0x09,0x60,0x3c,0xd5]
+// CHECK: mrs      x9, far_el3                // encoding: [0x09,0x60,0x3e,0xd5]
+// CHECK: mrs      x9, hpfar_el2              // encoding: [0x89,0x60,0x3c,0xd5]
+// CHECK: mrs      x9, par_el1                // encoding: [0x09,0x74,0x38,0xd5]
+// CHECK: mrs      x9, pmcr_el0               // encoding: [0x09,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmcntenset_el0         // encoding: [0x29,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmcntenclr_el0         // encoding: [0x49,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmovsclr_el0           // encoding: [0x69,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmselr_el0             // encoding: [0xa9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmceid0_el0            // encoding: [0xc9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmceid1_el0            // encoding: [0xe9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmccntr_el0            // encoding: [0x09,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmxevtyper_el0         // encoding: [0x29,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmxevcntr_el0          // encoding: [0x49,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmuserenr_el0          // encoding: [0x09,0x9e,0x3b,0xd5]
+// CHECK: mrs      x9, pmintenset_el1         // encoding: [0x29,0x9e,0x38,0xd5]
+// CHECK: mrs      x9, pmintenclr_el1         // encoding: [0x49,0x9e,0x38,0xd5]
+// CHECK: mrs      x9, pmovsset_el0           // encoding: [0x69,0x9e,0x3b,0xd5]
+// CHECK: mrs      x9, mair_el1               // encoding: [0x09,0xa2,0x38,0xd5]
+// CHECK: mrs      x9, mair_el2               // encoding: [0x09,0xa2,0x3c,0xd5]
+// CHECK: mrs      x9, mair_el3               // encoding: [0x09,0xa2,0x3e,0xd5]
+// CHECK: mrs      x9, amair_el1              // encoding: [0x09,0xa3,0x38,0xd5]
+// CHECK: mrs      x9, amair_el2              // encoding: [0x09,0xa3,0x3c,0xd5]
+// CHECK: mrs      x9, amair_el3              // encoding: [0x09,0xa3,0x3e,0xd5]
+// CHECK: mrs      x9, vbar_el1               // encoding: [0x09,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, vbar_el2               // encoding: [0x09,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, vbar_el3               // encoding: [0x09,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, rvbar_el1              // encoding: [0x29,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, rvbar_el2              // encoding: [0x29,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, rvbar_el3              // encoding: [0x29,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, rmr_el1                // encoding: [0x49,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, rmr_el2                // encoding: [0x49,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, rmr_el3                // encoding: [0x49,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, isr_el1                // encoding: [0x09,0xc1,0x38,0xd5]
+// CHECK: mrs      x9, contextidr_el1         // encoding: [0x29,0xd0,0x38,0xd5]
+// CHECK: mrs      x9, tpidr_el0              // encoding: [0x49,0xd0,0x3b,0xd5]
+// CHECK: mrs      x9, tpidr_el2              // encoding: [0x49,0xd0,0x3c,0xd5]
+// CHECK: mrs      x9, tpidr_el3              // encoding: [0x49,0xd0,0x3e,0xd5]
+// CHECK: mrs      x9, tpidrro_el0            // encoding: [0x69,0xd0,0x3b,0xd5]
+// CHECK: mrs      x9, tpidr_el1              // encoding: [0x89,0xd0,0x38,0xd5]
+// CHECK: mrs      x9, cntfrq_el0             // encoding: [0x09,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntpct_el0             // encoding: [0x29,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntvct_el0             // encoding: [0x49,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntvoff_el2            // encoding: [0x69,0xe0,0x3c,0xd5]
+// CHECK: mrs      x9, cntkctl_el1            // encoding: [0x09,0xe1,0x38,0xd5]
+// CHECK: mrs      x9, cnthctl_el2            // encoding: [0x09,0xe1,0x3c,0xd5]
+// CHECK: mrs      x9, cntp_tval_el0          // encoding: [0x09,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_tval_el2         // encoding: [0x09,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_tval_el1         // encoding: [0x09,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntp_ctl_el0           // encoding: [0x29,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_ctl_el2          // encoding: [0x29,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_ctl_el1          // encoding: [0x29,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntp_cval_el0          // encoding: [0x49,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_cval_el2         // encoding: [0x49,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_cval_el1         // encoding: [0x49,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntv_tval_el0          // encoding: [0x09,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, cntv_ctl_el0           // encoding: [0x29,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, cntv_cval_el0          // encoding: [0x49,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr0_el0          // encoding: [0x09,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr1_el0          // encoding: [0x29,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr2_el0          // encoding: [0x49,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr3_el0          // encoding: [0x69,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr4_el0          // encoding: [0x89,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr5_el0          // encoding: [0xa9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr6_el0          // encoding: [0xc9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr7_el0          // encoding: [0xe9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr8_el0          // encoding: [0x09,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr9_el0          // encoding: [0x29,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr10_el0         // encoding: [0x49,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr11_el0         // encoding: [0x69,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr12_el0         // encoding: [0x89,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr13_el0         // encoding: [0xa9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr14_el0         // encoding: [0xc9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr15_el0         // encoding: [0xe9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr16_el0         // encoding: [0x09,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr17_el0         // encoding: [0x29,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr18_el0         // encoding: [0x49,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr19_el0         // encoding: [0x69,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr20_el0         // encoding: [0x89,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr21_el0         // encoding: [0xa9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr22_el0         // encoding: [0xc9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr23_el0         // encoding: [0xe9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr24_el0         // encoding: [0x09,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr25_el0         // encoding: [0x29,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr26_el0         // encoding: [0x49,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr27_el0         // encoding: [0x69,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr28_el0         // encoding: [0x89,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr29_el0         // encoding: [0xa9,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr30_el0         // encoding: [0xc9,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmccfiltr_el0          // encoding: [0xe9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper0_el0         // encoding: [0x09,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper1_el0         // encoding: [0x29,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper2_el0         // encoding: [0x49,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper3_el0         // encoding: [0x69,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper4_el0         // encoding: [0x89,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper5_el0         // encoding: [0xa9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper6_el0         // encoding: [0xc9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper7_el0         // encoding: [0xe9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper8_el0         // encoding: [0x09,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper9_el0         // encoding: [0x29,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper10_el0        // encoding: [0x49,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper11_el0        // encoding: [0x69,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper12_el0        // encoding: [0x89,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper13_el0        // encoding: [0xa9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper14_el0        // encoding: [0xc9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper15_el0        // encoding: [0xe9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper16_el0        // encoding: [0x09,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper17_el0        // encoding: [0x29,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper18_el0        // encoding: [0x49,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper19_el0        // encoding: [0x69,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper20_el0        // encoding: [0x89,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper21_el0        // encoding: [0xa9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper22_el0        // encoding: [0xc9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper23_el0        // encoding: [0xe9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper24_el0        // encoding: [0x09,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper25_el0        // encoding: [0x29,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper26_el0        // encoding: [0x49,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper27_el0        // encoding: [0x69,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper28_el0        // encoding: [0x89,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper29_el0        // encoding: [0xa9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper30_el0        // encoding: [0xc9,0xef,0x3b,0xd5]
+
+        mrs x12, s3_7_c15_c1_5
+        mrs x13, s3_2_c11_c15_7
+        msr s3_0_c15_c0_0, x12
+        msr s3_7_c11_c13_7, x5
+// CHECK: mrs     x12, s3_7_c15_c1_5      // encoding: [0xac,0xf1,0x3f,0xd5]
+// CHECK: mrs     x13, s3_2_c11_c15_7     // encoding: [0xed,0xbf,0x3a,0xd5]
+// CHECK: msr     s3_0_c15_c0_0, x12      // encoding: [0x0c,0xf0,0x18,0xd5]
+// CHECK: msr     s3_7_c11_c13_7, x5      // encoding: [0xe5,0xbd,0x1f,0xd5]
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        tbz x5, #0, somewhere
+        tbz xzr, #63, elsewhere
+        tbnz x5, #45, nowhere
+// CHECK: tbz     x5, #0, somewhere       // encoding: [0x05'A',A,A,0x36'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_tstbr
+// CHECK: tbz     xzr, #63, elsewhere     // encoding: [0x1f'A',A,0xf8'A',0xb6'A']
+// CHECK:                                 //   fixup A - offset: 0, value: elsewhere, kind: fixup_a64_tstbr
+// CHECK: tbnz    x5, #45, nowhere        // encoding: [0x05'A',A,0x68'A',0xb7'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr
+
+        tbnz w3, #2, there
+        tbnz wzr, #31, nowhere
+        tbz w5, #12, anywhere
+// CHECK: tbnz    w3, #2, there           // encoding: [0x03'A',A,0x10'A',0x37'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_a64_tstbr
+// CHECK: tbnz    wzr, #31, nowhere       // encoding: [0x1f'A',A,0xf8'A',0x37'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr
+// CHECK: tbz     w5, #12, anywhere       // encoding: [0x05'A',A,0x60'A',0x36'A']
+// CHECK:                                 //   fixup A - offset: 0, value: anywhere, kind: fixup_a64_tstbr
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b somewhere
+        bl elsewhere
+// CHECK: b       somewhere               // encoding: [A,A,A,0x14'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_uncondbr
+// CHECK: bl      elsewhere               // encoding: [A,A,A,0x94'A']
+// CHECK:                                 //   fixup A - offset: 0, value: elsewhere, kind: fixup_a64_call
+
+        b #4
+        bl #0
+        b #134217724
+        bl #-134217728
+// CHECK: b       #4                      // encoding: [0x01,0x00,0x00,0x14]
+// CHECK: bl      #0                      // encoding: [0x00,0x00,0x00,0x94]
+// CHECK: b       #134217724              // encoding: [0xff,0xff,0xff,0x15]
+// CHECK: bl      #-134217728             // encoding: [0x00,0x00,0x00,0x96]
+
+//------------------------------------------------------------------------------
+// Unconditional branch (register)
+//------------------------------------------------------------------------------
+
+        br x20
+        blr xzr
+        ret x10
+// CHECK: br       x20                        // encoding: [0x80,0x02,0x1f,0xd6]
+// CHECK: blr      xzr                        // encoding: [0xe0,0x03,0x3f,0xd6]
+// CHECK: ret      x10                        // encoding: [0x40,0x01,0x5f,0xd6]
+
+        ret
+        eret
+        drps
+// CHECK: ret                                 // encoding: [0xc0,0x03,0x5f,0xd6]
+// CHECK: eret                                // encoding: [0xe0,0x03,0x9f,0xd6]
+// CHECK: drps                                // encoding: [0xe0,0x03,0xbf,0xd6]
+
diff --git a/test/MC/AArch64/elf-globaladdress.ll b/test/MC/AArch64/elf-globaladdress.ll
new file mode 100644
index 0000000..190439d
--- /dev/null
+++ b/test/MC/AArch64/elf-globaladdress.ll
@@ -0,0 +1,111 @@
+;; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+;; RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+; Also take it on a round-trip through llvm-mc to stretch assembly-parsing's legs:
+;; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | \
+;; RUN:     llvm-mc -arch=aarch64 -filetype=obj -o - | \
+;; RUN:     elf-dump | FileCheck -check-prefix=OBJ %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @loadstore() {
+    %val8 = load i8* @var8
+    store volatile i8 %val8, i8* @var8
+
+    %val16 = load i16* @var16
+    store volatile i16 %val16, i16* @var16
+
+    %val32 = load i32* @var32
+    store volatile i32 %val32, i32* @var32
+
+    %val64 = load i64* @var64
+    store volatile i64 %val64, i64* @var64
+
+    ret void
+}
+
+@globaddr = global i64* null
+
+define void @address() {
+    store i64* @var64, i64** @globaddr
+    ret void
+}
+
+; Check we're using EM_AARCH64
+; OBJ: 'e_machine', 0x00
+
+; OBJ: .rela.text
+
+; var8
+; R_AARCH64_ADR_PREL_PG_HI21 against var8
+; OBJ: 'r_sym', 0x0000000f
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST8_ABS_LO12_NC against var8
+; OBJ: 'r_sym', 0x0000000f
+; OBJ-NEXT: 'r_type', 0x00000116
+
+
+; var16
+; R_AARCH64_ADR_PREL_PG_HI21 against var16
+; OBJ: 'r_sym', 0x0000000c
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST16_ABS_LO12_NC against var16
+; OBJ: 'r_sym', 0x0000000c
+; OBJ-NEXT: 'r_type', 0x0000011c
+
+
+; var32
+; R_AARCH64_ADR_PREL_PG_HI21 against var32
+; OBJ: 'r_sym', 0x0000000d
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST32_ABS_LO12_NC against var32
+; OBJ: 'r_sym', 0x0000000d
+; OBJ-NEXT: 'r_type', 0x0000011d
+
+
+; var64
+; R_AARCH64_ADR_PREL_PG_HI21 against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST64_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x0000011e
+
+; This is on the store, so not really important, but it stops the next
+; match working.
+; R_AARCH64_LDST64_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x0000011e
+
+
+; Pure address-calculation against var64
+; R_AARCH64_ADR_PREL_PG_HI21 against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_ADD_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000115
+
+
+; Make sure the symbols don't move around, otherwise relocation info
+; will be wrong:
+
+; OBJ: Symbol 12
+; OBJ-NEXT: var16
+
+; OBJ: Symbol 13
+; OBJ-NEXT: var32
+
+; OBJ: Symbol 14
+; OBJ-NEXT: var64
+
+; OBJ: Symbol 15
+; OBJ-NEXT: var8
diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s
new file mode 100644
index 0000000..c5aa5b1
--- /dev/null
+++ b/test/MC/AArch64/elf-objdump.s
@@ -0,0 +1,5 @@
+// 64 bit little endian
+// RUN: llvm-mc -filetype=obj -arch=aarch64 -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d
+
+// We just want to see if llvm-objdump works at all.
+// CHECK: .text
diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s
new file mode 100644
index 0000000..7fa6e90
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-addsubimm.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        add x2, x3, #:lo12:some_label
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000115
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-condbr.s b/test/MC/AArch64/elf-reloc-condbr.s
new file mode 100644
index 0000000..283d3b9
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-condbr.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        b.eq somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000118
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
new file mode 100644
index 0000000..ce9ff49
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        ldr x0, some_label
+        ldr w3, some_label
+        ldrsw x9, some_label
+        prfm pldl3keep, some_label
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
new file mode 100644
index 0000000..345fc82
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        ldrb w0, [sp, #:lo12:some_label]
+        ldrh w0, [sp, #:lo12:some_label]
+        ldr w0, [sp, #:lo12:some_label]
+        ldr x0, [sp, #:lo12:some_label]
+        str q0, [sp, #:lo12:some_label]
+
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000116
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011c
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011d
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011e
+
+// OBJ: 'r_offset', 0x0000000000000010
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000012b
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s
new file mode 100644
index 0000000..cb7dc67
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-movw.s
@@ -0,0 +1,98 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        movz x0, #:abs_g0:some_label
+        movk x0, #:abs_g0_nc:some_label
+
+        movz x3, #:abs_g1:some_label
+        movk x5, #:abs_g1_nc:some_label
+
+        movz x3, #:abs_g2:some_label
+        movk x5, #:abs_g2_nc:some_label
+
+        movz x7, #:abs_g3:some_label
+        movk x11, #:abs_g3:some_label
+
+        movz x13, #:abs_g0_s:some_label
+        movn x17, #:abs_g0_s:some_label
+
+        movz x19, #:abs_g1_s:some_label
+        movn x19, #:abs_g1_s:some_label
+
+        movz x19, #:abs_g2_s:some_label
+        movn x19, #:abs_g2_s:some_label
+// OBJ: .rela.text
+
+// :abs_g0: => R_AARCH64_MOVW_UABS_G0
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000107
+
+// :abs_g0_nc: => R_AARCH64_MOVW_UABS_G0_NC
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000108
+
+// :abs_g1: => R_AARCH64_MOVW_UABS_G1
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000109
+
+// :abs_g1_nc: => R_AARCH64_MOVW_UABS_G1_NC
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010a
+
+// :abs_g2: => R_AARCH64_MOVW_UABS_G2
+// OBJ: 'r_offset', 0x0000000000000010
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010b
+
+// :abs_g2_nc: => R_AARCH64_MOVW_UABS_G2_NC
+// OBJ: 'r_offset', 0x0000000000000014
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010c
+
+// :abs_g3: => R_AARCH64_MOVW_UABS_G3
+// OBJ: 'r_offset', 0x0000000000000018
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010d
+
+// :abs_g3: => R_AARCH64_MOVW_UABS_G3
+// OBJ: 'r_offset', 0x000000000000001c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010d
+
+// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
+// OBJ: 'r_offset', 0x0000000000000020
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010e
+
+// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
+// OBJ: 'r_offset', 0x0000000000000024
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010e
+
+// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
+// OBJ: 'r_offset', 0x0000000000000028
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010f
+
+// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
+// OBJ: 'r_offset', 0x000000000000002c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010f
+
+// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
+// OBJ: 'r_offset', 0x0000000000000030
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000110
+
+// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
+// OBJ: 'r_offset', 0x0000000000000034
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000110
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s
new file mode 100644
index 0000000..39a8ba9
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        adr x2, some_label
+        adrp x5, some_label
+
+        adrp x5, :got:some_label
+        ldr x0, [x5, #:got_lo12:some_label]
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000112
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000113
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000137
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000138
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
new file mode 100644
index 0000000..c5e2981
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        tbz x6, #45, somewhere
+        tbnz w3, #15, somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000117
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000117
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
new file mode 100644
index 0000000..0e97bc6
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        b somewhere
+        bl somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011a
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011b
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
+\ No newline at end of file
diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg
new file mode 100644
index 0000000..cc02173
--- /dev/null
+++ b/test/MC/AArch64/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+\ No newline at end of file
diff --git a/test/MC/AArch64/mapping-across-sections.s b/test/MC/AArch64/mapping-across-sections.s
new file mode 100644
index 0000000..3d32c1d
--- /dev/null
+++ b/test/MC/AArch64/mapping-across-sections.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+        .text
+        add w0, w0, w0
+
+// .wibble should *not* inherit .text's mapping symbol. It's a completely different section.
+        .section .wibble
+        add w0, w0, w0
+
+// A setion should be able to start with a $d
+        .section .starts_data
+        .word 42
+
+// Changing back to .text should not emit a redundant $x
+        .text
+        add w0, w0, w0
+
+// With all those constraints, we want:
+//   + .text to have $x at 0 and no others
+//   + .wibble to have $x at 0
+//   + .starts_data to have $d at 0
+
+
+// CHECK: 00000000 .starts_data 00000000 $d
+// CHECK-NEXT: 00000000 .text 00000000 $x
+// CHECK-NEXT: 00000000 .wibble 00000000 $x
+// CHECK-NOT: ${{[adtx]}}
+
diff --git a/test/MC/AArch64/mapping-within-section.s b/test/MC/AArch64/mapping-within-section.s
new file mode 100644
index 0000000..c8bd804
--- /dev/null
+++ b/test/MC/AArch64/mapping-within-section.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+    .text
+// $x at 0x0000
+    add w0, w0, w0
+// $d at 0x0004
+    .ascii "012"
+    .byte 1
+    .hword 2
+    .word 4
+    .xword 8
+    .single 4.0
+    .double 8.0
+    .space 10
+    .zero 3
+    .fill 10, 2, 42
+    .org 100, 12
+// $x at 0x0018
+    add x0, x0, x0
+
+// CHECK: 00000004         .text  00000000 $d
+// CHECK-NEXT: 00000000         .text  00000000 $x
+// CHECK-NEXT: 00000064         .text  00000000 $x
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
new file mode 100644
index 0000000..690fa8c
--- /dev/null
+++ b/test/MC/AArch64/tls-relocs.s
@@ -0,0 +1,662 @@
+// RUN: llvm-mc -arch=aarch64 -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -arch=aarch64 -filetype=obj < %s -o %t
+// RUN: elf-dump %t | FileCheck --check-prefix=CHECK-ELF %s
+// RUN: llvm-objdump -r %t | FileCheck --check-prefix=CHECK-ELF-NAMES %s
+
+// CHECK-ELF:  .rela.text
+
+        // TLS local-dynamic forms
+        movz x1, #:dtprel_g2:var
+        movn x2, #:dtprel_g2:var
+        movz x3, #:dtprel_g2:var
+        movn x4, #:dtprel_g2:var
+// CHECK: movz    x1, #:dtprel_g2:var     // encoding: [0x01'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movn    x2, #:dtprel_g2:var     // encoding: [0x02'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movz    x3, #:dtprel_g2:var     // encoding: [0x03'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movn    x4, #:dtprel_g2:var     // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+
+// CHECK-ELF: # Relocation 0
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000000)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM:0x[0-9a-f]+]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 1
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000004)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 2
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000008)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 3
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000000c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+
+// CHECK-ELF-NAMES: 0 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 4 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 8 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 12 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+
+        movz x5, #:dtprel_g1:var
+        movn x6, #:dtprel_g1:var
+        movz w7, #:dtprel_g1:var
+        movn w8, #:dtprel_g1:var
+// CHECK: movz    x5, #:dtprel_g1:var     // encoding: [0x05'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movn    x6, #:dtprel_g1:var     // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movz    w7, #:dtprel_g1:var     // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movn    w8, #:dtprel_g1:var     // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+
+// CHECK-ELF: # Relocation 4
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000010)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 5
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000014)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 6
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000018)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 7
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000001c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+
+// CHECK-ELF-NAMES: 16 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 20 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 24 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 28 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+
+        movk x9, #:dtprel_g1_nc:var
+        movk w10, #:dtprel_g1_nc:var
+// CHECK: movk    x9, #:dtprel_g1_nc:var  // encoding: [0x09'A',A,0xa0'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
+// CHECK-NEXT: movk    w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
+
+// CHECK-ELF: # Relocation 8
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000020)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
+// CHECK-ELF: # Relocation 9
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000024)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
+
+// CHECK-ELF-NAMES: 32 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
+// CHECK-ELF-NAMES: 36 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
+
+        movz x11, #:dtprel_g0:var
+        movn x12, #:dtprel_g0:var
+        movz w13, #:dtprel_g0:var
+        movn w14, #:dtprel_g0:var
+// CHECK: movz    x11, #:dtprel_g0:var    // encoding: [0x0b'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movn    x12, #:dtprel_g0:var    // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movz    w13, #:dtprel_g0:var    // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movn    w14, #:dtprel_g0:var    // encoding: [0x0e'A',A,0x80'A',0x12'A']
+
+
+// CHECK-ELF: # Relocation 10
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000028)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 11
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000002c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 12
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000030)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 13
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000034)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+
+// CHECK-ELF-NAMES: 40 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 44 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 48 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 52 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+
+
+        movk x15, #:dtprel_g0_nc:var
+        movk w16, #:dtprel_g0_nc:var
+// CHECK: movk    x15, #:dtprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
+// CHECK-NEXT: movk    w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
+
+// CHECK-ELF: # Relocation 14
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000038)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
+// CHECK-ELF: # Relocation 15
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000003c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
+
+// CHECK-ELF-NAMES: 56 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
+// CHECK-ELF-NAMES: 60 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
+
+        add x17, x18, #:dtprel_hi12:var, lsl #12
+        add w19, w20, #:dtprel_hi12:var, lsl #12
+// CHECK: add     x17, x18, #:dtprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
+// CHECK-NEXT: add     w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
+
+// CHECK-ELF: # Relocation 16
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000040)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
+// CHECK-ELF: # Relocation 17
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000044)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
+
+// CHECK-ELF-NAMES: 64 R_AARCH64_TLSLD_ADD_DTPREL_HI12
+// CHECK-ELF-NAMES: 68 R_AARCH64_TLSLD_ADD_DTPREL_HI12
+
+
+        add x21, x22, #:dtprel_lo12:var
+        add w23, w24, #:dtprel_lo12:var
+// CHECK: add     x21, x22, #:dtprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
+// CHECK-NEXT: add     w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
+
+// CHECK-ELF: # Relocation 18
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000048)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
+// CHECK-ELF: # Relocation 19
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000004c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
+
+// CHECK-ELF-NAMES: 72 R_AARCH64_TLSLD_ADD_DTPREL_LO12
+// CHECK-ELF-NAMES: 76 R_AARCH64_TLSLD_ADD_DTPREL_LO12
+
+        add x25, x26, #:dtprel_lo12_nc:var
+        add w27, w28, #:dtprel_lo12_nc:var
+// CHECK: add     x25, x26, #:dtprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
+// CHECK-NEXT: add     w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
+
+// CHECK-ELF: # Relocation 20
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000050)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
+// CHECK-ELF: # Relocation 21
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000054)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
+
+// CHECK-ELF-NAMES: 80 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+// CHECK-ELF-NAMES: 84 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+
+        ldrb w29, [x30, #:dtprel_lo12:var]
+        ldrsb x29, [x28, #:dtprel_lo12_nc:var]
+// CHECK: ldrb    w29, [x30, #:dtprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst8_dtprel_lo12
+// CHECK-NEXT: ldrsb   x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst8_dtprel_lo12_nc
+
+// CHECK-ELF: # Relocation 22
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000058)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000213)
+// CHECK-ELF: # Relocation 23
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000005c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000214)
+
+// CHECK-ELF-NAMES: 88 R_AARCH64_TLSLD_LDST8_DTPREL_LO12
+// CHECK-ELF-NAMES: 92 R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC
+
+        strh w27, [x26, #:dtprel_lo12:var]
+        ldrsh x25, [x24, #:dtprel_lo12_nc:var]
+// CHECK: strh    w27, [x26, #:dtprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst16_dtprel_lo12
+// CHECK-NEXT: ldrsh   x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst16_dtprel_lo12_n
+
+// CHECK-ELF: # Relocation 24
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000060)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000215)
+// CHECK-ELF: # Relocation 25
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000064)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000216)
+
+// CHECK-ELF-NAMES: 96 R_AARCH64_TLSLD_LDST16_DTPREL_LO12
+// CHECK-ELF-NAMES: 100 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC
+
+        ldr w23, [x22, #:dtprel_lo12:var]
+        ldrsw x21, [x20, #:dtprel_lo12_nc:var]
+// CHECK: ldr     w23, [x22, #:dtprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst32_dtprel_lo12
+// CHECK-NEXT: ldrsw   x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst32_dtprel_lo12_n
+
+// CHECK-ELF: # Relocation 26
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000068)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000217)
+// CHECK-ELF: # Relocation 27
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000006c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000218)
+
+// CHECK-ELF-NAMES: 104 R_AARCH64_TLSLD_LDST32_DTPREL_LO12
+// CHECK-ELF-NAMES: 108 R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC
+
+        ldr x19, [x18, #:dtprel_lo12:var]
+        str x17, [x16, #:dtprel_lo12_nc:var]
+// CHECK: ldr     x19, [x18, #:dtprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst64_dtprel_lo12
+// CHECK-NEXT: str     x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst64_dtprel_lo12_nc
+
+
+// CHECK-ELF: # Relocation 28
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000070)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000219)
+// CHECK-ELF: # Relocation 29
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000074)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021a)
+
+// CHECK-ELF-NAMES: 112 R_AARCH64_TLSLD_LDST64_DTPREL_LO12
+// CHECK-ELF-NAMES: 116 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC
+
+        // TLS initial-exec forms
+        movz x15, #:gottprel_g1:var
+        movz w14, #:gottprel_g1:var
+// CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0x0f'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
+// CHECK-NEXT: movz    w14, #:gottprel_g1:var  // encoding: [0x0e'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
+
+// CHECK-ELF: # Relocation 30
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000078)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
+// CHECK-ELF: # Relocation 31
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000007c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
+
+// CHECK-ELF-NAMES: 120 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
+// CHECK-ELF-NAMES: 124 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
+
+        movk x13, #:gottprel_g0_nc:var
+        movk w12, #:gottprel_g0_nc:var
+// CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0x0d'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
+// CHECK-NEXT: movk    w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
+
+// CHECK-ELF: # Relocation 32
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000080)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
+// CHECK-ELF: # Relocation 33
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000084)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
+
+// CHECK-ELF-NAMES: 128 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
+// CHECK-ELF-NAMES: 132 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
+
+        adrp x11, :gottprel:var
+        ldr x10, [x0, #:gottprel_lo12:var]
+        ldr x9, :gottprel:var
+// CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_adr_gottprel_page
+// CHECK-NEXT: ldr     x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_a64_ld64_gottprel_lo12_nc
+// CHECK-NEXT: ldr     x9, :gottprel:var       // encoding: [0x09'A',A,A,0x58'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_ld_gottprel_prel19
+
+// CHECK-ELF: # Relocation 34
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000088)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021d)
+// CHECK-ELF: # Relocation 35
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000008c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021e)
+// CHECK-ELF: # Relocation 36
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000090)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021f)
+
+// CHECK-ELF-NAMES: 136 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE
+// CHECK-ELF-NAMES: 140 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+// CHECK-ELF-NAMES: 144 R_AARCH64_TLSIE_LD_GOTTPREL_PREL19
+
+        // TLS local-exec forms
+        movz x3, #:tprel_g2:var
+        movn x4, #:tprel_g2:var
+// CHECK: movz    x3, #:tprel_g2:var      // encoding: [0x03'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
+// CHECK-NEXT: movn    x4, #:tprel_g2:var      // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
+
+// CHECK-ELF: # Relocation 37
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000094)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
+// CHECK-ELF: # Relocation 38
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000098)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
+
+// CHECK-ELF-NAMES: 148 R_AARCH64_TLSLE_MOVW_TPREL_G2
+// CHECK-ELF-NAMES: 152 R_AARCH64_TLSLE_MOVW_TPREL_G2
+
+        movz x5, #:tprel_g1:var
+        movn x6, #:tprel_g1:var
+        movz w7, #:tprel_g1:var
+        movn w8, #:tprel_g1:var
+// CHECK: movz    x5, #:tprel_g1:var      // encoding: [0x05'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movn    x6, #:tprel_g1:var      // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movz    w7, #:tprel_g1:var      // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movn    w8, #:tprel_g1:var      // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+
+// CHECK-ELF: # Relocation 39
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000009c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 40
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 41
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 42
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+
+// CHECK-ELF-NAMES: 156 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 160 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 164 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 168 R_AARCH64_TLSLE_MOVW_TPREL_G1
+
+        movk x9, #:tprel_g1_nc:var
+        movk w10, #:tprel_g1_nc:var
+// CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0x09'A',A,0xa0'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
+// CHECK-NEXT: movk    w10, #:tprel_g1_nc:var  // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
+
+// CHECK-ELF: # Relocation 43
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ac)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
+// CHECK-ELF: # Relocation 44
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
+
+// CHECK-ELF-NAMES: 172 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
+// CHECK-ELF-NAMES: 176 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
+
+        movz x11, #:tprel_g0:var
+        movn x12, #:tprel_g0:var
+        movz w13, #:tprel_g0:var
+        movn w14, #:tprel_g0:var
+// CHECK: movz    x11, #:tprel_g0:var     // encoding: [0x0b'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movn    x12, #:tprel_g0:var     // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movz    w13, #:tprel_g0:var     // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movn    w14, #:tprel_g0:var     // encoding: [0x0e'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+
+// CHECK-ELF: # Relocation 45
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 46
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 47
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000bc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 48
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+
+// CHECK-ELF-NAMES: 180 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 184 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 188 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 192 R_AARCH64_TLSLE_MOVW_TPREL_G0
+
+        movk x15, #:tprel_g0_nc:var
+        movk w16, #:tprel_g0_nc:var
+// CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0x0f'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
+// CHECK-NEXT: movk    w16, #:tprel_g0_nc:var  // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
+
+// CHECK-ELF: # Relocation 49
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
+// CHECK-ELF: # Relocation 50
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
+
+// CHECK-ELF-NAMES: 196 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+// CHECK-ELF-NAMES: 200 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+
+        add x17, x18, #:tprel_hi12:var, lsl #12
+        add w19, w20, #:tprel_hi12:var, lsl #12
+// CHECK: add     x17, x18, #:tprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
+// CHECK-NEXT: add     w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
+
+// CHECK-ELF: # Relocation 51
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000cc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
+// CHECK-ELF: # Relocation 52
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
+
+// CHECK-ELF-NAMES: 204 R_AARCH64_TLSLE_ADD_TPREL_HI12
+// CHECK-ELF-NAMES: 208 R_AARCH64_TLSLE_ADD_TPREL_HI12
+
+        add x21, x22, #:tprel_lo12:var
+        add w23, w24, #:tprel_lo12:var
+// CHECK: add     x21, x22, #:tprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
+// CHECK-NEXT: add     w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
+
+// CHECK-ELF: # Relocation 53
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
+// CHECK-ELF: # Relocation 54
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
+
+// CHECK-ELF-NAMES: 212 R_AARCH64_TLSLE_ADD_TPREL_LO12
+// CHECK-ELF-NAMES: 216 R_AARCH64_TLSLE_ADD_TPREL_LO12
+
+        add x25, x26, #:tprel_lo12_nc:var
+        add w27, w28, #:tprel_lo12_nc:var
+// CHECK: add     x25, x26, #:tprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
+// CHECK-NEXT: add     w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 55
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000dc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
+// CHECK-ELF: # Relocation 56
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
+
+
+// CHECK-ELF-NAMES: 220 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
+// CHECK-ELF-NAMES: 224 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
+
+        ldrb w29, [x30, #:tprel_lo12:var]
+        ldrsb x29, [x28, #:tprel_lo12_nc:var]
+// CHECK: ldrb    w29, [x30, #:tprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst8_tprel_lo12
+// CHECK-NEXT: ldrsb   x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst8_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 57
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000228)
+// CHECK-ELF: # Relocation 58
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000229)
+
+// CHECK-ELF-NAMES: 228 R_AARCH64_TLSLE_LDST8_TPREL_LO12
+// CHECK-ELF-NAMES: 232 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC
+
+        strh w27, [x26, #:tprel_lo12:var]
+        ldrsh x25, [x24, #:tprel_lo12_nc:var]
+// CHECK: strh    w27, [x26, #:tprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst16_tprel_lo12
+// CHECK-NEXT: ldrsh   x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst16_tprel_lo12_n
+
+// CHECK-ELF: # Relocation 59
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ec)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022a)
+// CHECK-ELF: # Relocation 60
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022b)
+
+// CHECK-ELF-NAMES: 236 R_AARCH64_TLSLE_LDST16_TPREL_LO12
+// CHECK-ELF-NAMES: 240 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC
+
+        ldr w23, [x22, #:tprel_lo12:var]
+        ldrsw x21, [x20, #:tprel_lo12_nc:var]
+// CHECK: ldr     w23, [x22, #:tprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst32_tprel_lo12
+// CHECK-NEXT: ldrsw   x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst32_tprel_lo12_n
+
+// CHECK-ELF: # Relocation 61
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022c)
+// CHECK-ELF: # Relocation 62
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022d)
+
+// CHECK-ELF-NAMES: 244 R_AARCH64_TLSLE_LDST32_TPREL_LO12
+// CHECK-ELF-NAMES: 248 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC
+
+        ldr x19, [x18, #:tprel_lo12:var]
+        str x17, [x16, #:tprel_lo12_nc:var]
+// CHECK: ldr     x19, [x18, #:tprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst64_tprel_lo12
+// CHECK-NEXT: str     x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst64_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 63
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000fc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022e)
+// CHECK-ELF: # Relocation 64
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000100)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022f)
+
+// CHECK-ELF-NAMES: 252 R_AARCH64_TLSLE_LDST64_TPREL_LO12
+// CHECK-ELF-NAMES: 256 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC
+
+        // TLS descriptor forms
+        adrp x8, :tlsdesc:var
+        ldr x7, [x6, :tlsdesc_lo12:var]
+        add x5, x4, #:tlsdesc_lo12:var
+        .tlsdesccall var
+        blr x3
+
+// CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_adr_page
+// CHECK-NEXT: ldr     x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_ld64_lo12_nc
+// CHECK-NEXT: add     x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_add_lo12_nc
+// CHECK-NEXT: .tlsdesccall var                // encoding: []
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_call
+// CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
+
+
+// CHECK-ELF: # Relocation 65
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000104)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000232)
+// CHECK-ELF: # Relocation 66
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000108)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000233)
+// CHECK-ELF: # Relocation 67
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000010c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000234)
+// CHECK-ELF: # Relocation 68
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000110)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000239)
+
+// CHECK-ELF-NAMES: 260 R_AARCH64_TLSDESC_ADR_PAGE
+// CHECK-ELF-NAMES: 264 R_AARCH64_TLSDESC_LD64_LO12_NC
+// CHECK-ELF-NAMES: 268 R_AARCH64_TLSDESC_ADD_LO12_NC
+// CHECK-ELF-NAMES: 272 R_AARCH64_TLSDESC_CALL
+
+
+// Make sure symbol 5 has type STT_TLS:
+
+// CHECK-ELF: # Symbol 5
+// CHECK-ELF-NEXT: (('st_name', 0x00000006) # 'var'
+// CHECK-ELF-NEXT:  ('st_bind', 0x1)
+// CHECK-ELF-NEXT:  ('st_type', 0x6)
diff --git a/test/MC/ARM/AlignedBundling/group-bundle-arm.s b/test/MC/ARM/AlignedBundling/group-bundle-arm.s
index 823d9e0..1d67353 100644
--- a/test/MC/ARM/AlignedBundling/group-bundle-arm.s
+++ b/test/MC/ARM/AlignedBundling/group-bundle-arm.s
@@ -5,8 +5,8 @@
 # instructions should not be inserted. However, for bundle-locked groups
 # it can be.
 
-	.syntax unified
-	.text
+  .syntax unified
+  .text
   .bundle_align_mode 4
 
   bx lr
@@ -35,3 +35,14 @@
 # CHECK-NEXT: 2c: nop
 # CHECK-NEXT: 30: bx
 
+  .align 4
+foo:
+  b foo
+  .long 3892240112
+  .long 3892240112
+  .long 3892240112
+  .long 3892240112
+  .long 3892240112
+  .long 3892240112
+# CHECK:  40: b
+
diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s
index ce7e036..a4b6bda 100644
--- a/test/MC/ARM/arm_instructions.s
+++ b/test/MC/ARM/arm_instructions.s
@@ -1,7 +1,14 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s | FileCheck %s
-
-@ CHECK: trap
-@ CHECK: encoding: [0xfe,0xde,0xff,0xe7]
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s \
+@ RUN:  | FileCheck %s -check-prefix=ALL
+@ RUN: llvm-mc -mcpu=cortex-a9-mp -triple armv7-unknown-nacl -show-encoding %s \
+@ RUN:  | FileCheck %s -check-prefix=NACL
+@ RUN: llvm-mc -mcpu=cortex-a8 -mattr=+nacl-trap -triple armv7 -show-encoding %s \
+@ RUN:  | FileCheck %s -check-prefix=NACL
+
+@ ALL: trap
+@ ALL: encoding: [0xfe,0xde,0xff,0xe7]
+@ NACL: trap
+@ NACL: encoding: [0xf0,0xde,0xfe,0xe7]
         trap
 
 @ CHECK: bx	lr
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 5c2a214..45ea278 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -2087,6 +2087,49 @@ Lforward:
 @ CHECK: srsia	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf8]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
 
+@ Compatibility aliases.
+        srsda #5
+        srsdb #1
+        srsia #0
+        srsib #15
+
+        srsda #31!
+        srsdb #19!
+        srsia #2!
+        srsib #14!
+
+        srsfa #11
+        srsea #10
+        srsfd #9
+        srsed #5
+
+        srsfa #5!
+        srsea #5!
+        srsfd #5!
+        srsed #5!
+
+        srs #5
+        srs #5!
+
+@ CHECK: srsda	sp, #5                  @ encoding: [0x05,0x05,0x4d,0xf8]
+@ CHECK: srsdb	sp, #1                  @ encoding: [0x01,0x05,0x4d,0xf9]
+@ CHECK: srsia	sp, #0                  @ encoding: [0x00,0x05,0xcd,0xf8]
+@ CHECK: srsib	sp, #15                 @ encoding: [0x0f,0x05,0xcd,0xf9]
+@ CHECK: srsda	sp!, #31                @ encoding: [0x1f,0x05,0x6d,0xf8]
+@ CHECK: srsdb	sp!, #19                @ encoding: [0x13,0x05,0x6d,0xf9]
+@ CHECK: srsia	sp!, #2                 @ encoding: [0x02,0x05,0xed,0xf8]
+@ CHECK: srsib	sp!, #14                @ encoding: [0x0e,0x05,0xed,0xf9]
+@ CHECK: srsda	sp, #11                 @ encoding: [0x0b,0x05,0x4d,0xf8]
+@ CHECK: srsdb	sp, #10                 @ encoding: [0x0a,0x05,0x4d,0xf9]
+@ CHECK: srsia	sp, #9                  @ encoding: [0x09,0x05,0xcd,0xf8]
+@ CHECK: srsib	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf9]
+@ CHECK: srsda	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf8]
+@ CHECK: srsdb	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf9]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
+@ CHECK: srsib	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf9]
+@ CHECK: srsia	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf8]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
+
 
 @------------------------------------------------------------------------------
 @ SSAT
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index d495c91..9278a2a 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -2352,6 +2352,32 @@ _func:
 @ CHECK: srsia	sp, #5                  @ encoding: [0x8d,0xe9,0x05,0xc0]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
 
+        srsdb #1
+        srsia #0
+
+        srsdb #19!
+        srsia #2!
+
+        srsea #10
+        srsfd #9
+
+        srsea #5!
+        srsfd #5!
+
+        srs #5
+        srs #5!
+
+@ CHECK: srsdb	sp, #1                  @ encoding: [0x0d,0xe8,0x01,0xc0]
+@ CHECK: srsia	sp, #0                  @ encoding: [0x8d,0xe9,0x00,0xc0]
+@ CHECK: srsdb	sp!, #19                @ encoding: [0x2d,0xe8,0x13,0xc0]
+@ CHECK: srsia	sp!, #2                 @ encoding: [0xad,0xe9,0x02,0xc0]
+@ CHECK: srsdb	sp, #10                 @ encoding: [0x0d,0xe8,0x0a,0xc0]
+@ CHECK: srsia	sp, #9                  @ encoding: [0x8d,0xe9,0x09,0xc0]
+@ CHECK: srsdb	sp!, #5                 @ encoding: [0x2d,0xe8,0x05,0xc0]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
+@ CHECK: srsia	sp, #5                  @ encoding: [0x8d,0xe9,0x05,0xc0]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
+
 
 @------------------------------------------------------------------------------
 @ SSAT
diff --git a/test/MC/ARM/elf-eflags-eabi-cg.ll b/test/MC/ARM/elf-eflags-eabi-cg.ll
new file mode 100644
index 0000000..2e86a0f
--- /dev/null
+++ b/test/MC/ARM/elf-eflags-eabi-cg.ll
@@ -0,0 +1,13 @@
+; Codegen version to check for ELF header flags.
+;
+; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
+; RUN: -filetype=obj -o - | elf-dump --dump-section-data | \
+; RUN: FileCheck %s
+
+define void @bar() nounwind {
+entry:
+  ret void
+}
+
+; For now the only e_flag set is EF_ARM_EABI_VER5
+;CHECK:    'e_flags', 0x05000000
diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s
index e8c1dd6..8c72288 100644
--- a/test/MC/ARM/neon-bitwise-encoding.s
+++ b/test/MC/ARM/neon-bitwise-encoding.s
@@ -1,4 +1,5 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s \
+@ RUN: | FileCheck %s
 
 	vand	d16, d17, d16
 	vand	q8, q8, q9
@@ -255,6 +256,42 @@
 	veor.f   q8, q2
 	veor.i64 q8, q2
 
+	vclt.s16 q5, #0
+	vclt.s16 d5, #0
+
+	vceq.s16 q5, q3
+	vceq.s16 d5, d3
+
+	vcgt.s16 q5, q3
+	vcgt.s16 d5, d3
+
+	vcge.s16 q5, q3
+	vcge.s16 d5, d3
+
+	vcgt.s16 q5, #0
+	vcgt.s16 d5, #0
+
+	vcge.s16 q5, #0
+	vcge.s16 d5, #0
+
+	vceq.s16 q5, #0
+	vceq.s16 d5, #0
+
+	vcle.s16 q5, #0
+	vcle.s16 d5, #0
+
+	vacge.f32 d5, d30
+	vacge.f32 q5, q3
+
+	vacgt.f32 d5, d30
+	vacgt.f32 q5, q3
+
+@ FIXME: We don't have an alias that reverses the operands
+@  vacle.f32 d5, d30 
+@  vacle.f32 q5, q3 
+@  vaclt.f32 d5, d30
+@  vaclt.f32 q5, q3
+
 @ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
 @ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
 @ CHECK: vand	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf2]
@@ -272,3 +309,32 @@
 @ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
 @ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
 @ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+@ CHECK: vclt.s16        q5, q5, #0      @ encoding: [0x4a,0xa2,0xb5,0xf3]
+@ CHECK: vclt.s16        d5, d5, #0      @ encoding: [0x05,0x52,0xb5,0xf3]
+
+@ CHECK: vceq.i16        q5, q5, q3      @ encoding: [0x56,0xa8,0x1a,0xf3]
+@ CHECK: vceq.i16        d5, d5, d3      @ encoding: [0x13,0x58,0x15,0xf3]
+
+@ CHECK: vcgt.s16        q5, q5, q3      @ encoding: [0x46,0xa3,0x1a,0xf2]
+@ CHECK: vcgt.s16        d5, d5, d3      @ encoding: [0x03,0x53,0x15,0xf2]
+
+@ CHECK: vcge.s16        q5, q5, q3      @ encoding: [0x56,0xa3,0x1a,0xf2]
+@ CHECK: vcge.s16        d5, d5, d3      @ encoding: [0x13,0x53,0x15,0xf2]
+
+@ CHECK: vcgt.s16        q5, q5, #0      @ encoding: [0x4a,0xa0,0xb5,0xf3]
+@ CHECK: vcgt.s16        d5, d5, #0      @ encoding: [0x05,0x50,0xb5,0xf3]
+
+@ CHECK: vcge.s16        q5, q5, #0      @ encoding: [0xca,0xa0,0xb5,0xf3]
+@ CHECK: vcge.s16        d5, d5, #0      @ encoding: [0x85,0x50,0xb5,0xf3]
+
+@ CHECK: vceq.i16        q5, q5, #0      @ encoding: [0x4a,0xa1,0xb5,0xf3]
+@ CHECK: vceq.i16        d5, d5, #0      @ encoding: [0x05,0x51,0xb5,0xf3]
+
+@ CHECK: vcle.s16        q5, q5, #0      @ encoding: [0xca,0xa1,0xb5,0xf3]
+@ CHECK: vcle.s16        d5, d5, #0      @ encoding: [0x85,0x51,0xb5,0xf3]
+
+@ CHECK: vacge.f32       d5, d5, d30     @ encoding: [0x3e,0x5e,0x05,0xf3]
+@ CHECK: vacge.f32       q5, q5, q3      @ encoding: [0x56,0xae,0x0a,0xf3]
+
+@ CHECK: vacgt.f32       d5, d5, d30     @ encoding: [0x3e,0x5e,0x25,0xf3]
+@ CHECK: vacgt.f32       q5, q5, q3      @ encoding: [0x56,0xae,0x2a,0xf3]
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 3cc6bf1..648e917 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -1,163 +1,163 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
 
-	vld1.8	{d16}, [r0, :64]
+	vld1.8	{d16}, [r0:64]
 	vld1.16	{d16}, [r0]
 	vld1.32	{d16}, [r0]
 	vld1.64	{d16}, [r0]
-	vld1.8	{d16, d17}, [r0, :64]
-	vld1.16	{d16, d17}, [r0, :128]
+	vld1.8	{d16, d17}, [r0:64]
+	vld1.16	{d16, d17}, [r0:128]
 	vld1.32	{d16, d17}, [r0]
 	vld1.64	{d16, d17}, [r0]
 	vld1.8 {d1, d2, d3}, [r3]
-	vld1.16 {d4, d5, d6}, [r3, :64]
+	vld1.16 {d4, d5, d6}, [r3:64]
 	vld1.32 {d5, d6, d7}, [r3]
-	vld1.64 {d6, d7, d8}, [r3, :64]
+	vld1.64 {d6, d7, d8}, [r3:64]
 	vld1.8 {d1, d2, d3, d4}, [r3]
-	vld1.16 {d4, d5, d6, d7}, [r3, :64]
+	vld1.16 {d4, d5, d6, d7}, [r3:64]
 	vld1.32 {d5, d6, d7, d8}, [r3]
-	vld1.64 {d6, d7, d8, d9}, [r3, :64]
+	vld1.64 {d6, d7, d8, d9}, [r3:64]
 
-	vld1.8	{d16}, [r0, :64]!
+	vld1.8	{d16}, [r0:64]!
 	vld1.16	{d16}, [r0]!
 	vld1.32	{d16}, [r0]!
 	vld1.64	{d16}, [r0]!
-	vld1.8	{d16, d17}, [r0, :64]!
-	vld1.16	{d16, d17}, [r0, :128]!
+	vld1.8	{d16, d17}, [r0:64]!
+	vld1.16	{d16, d17}, [r0:128]!
 	vld1.32	{d16, d17}, [r0]!
 	vld1.64	{d16, d17}, [r0]!
 
-	vld1.8	{d16}, [r0, :64], r5
+	vld1.8	{d16}, [r0:64], r5
 	vld1.16	{d16}, [r0], r5
 	vld1.32	{d16}, [r0], r5
 	vld1.64	{d16}, [r0], r5
-	vld1.8	{d16, d17}, [r0, :64], r5
-	vld1.16	{d16, d17}, [r0, :128], r5
+	vld1.8	{d16, d17}, [r0:64], r5
+	vld1.16	{d16, d17}, [r0:128], r5
 	vld1.32	{d16, d17}, [r0], r5
 	vld1.64	{d16, d17}, [r0], r5
 
 	vld1.8 {d1, d2, d3}, [r3]!
-	vld1.16 {d4, d5, d6}, [r3, :64]!
+	vld1.16 {d4, d5, d6}, [r3:64]!
 	vld1.32 {d5, d6, d7}, [r3]!
-	vld1.64 {d6, d7, d8}, [r3, :64]!
+	vld1.64 {d6, d7, d8}, [r3:64]!
 
 	vld1.8 {d1, d2, d3}, [r3], r6
-	vld1.16 {d4, d5, d6}, [r3, :64], r6
+	vld1.16 {d4, d5, d6}, [r3:64], r6
 	vld1.32 {d5, d6, d7}, [r3], r6
-	vld1.64 {d6, d7, d8}, [r3, :64], r6
+	vld1.64 {d6, d7, d8}, [r3:64], r6
 
 	vld1.8 {d1, d2, d3, d4}, [r3]!
-	vld1.16 {d4, d5, d6, d7}, [r3, :64]!
+	vld1.16 {d4, d5, d6, d7}, [r3:64]!
 	vld1.32 {d5, d6, d7, d8}, [r3]!
-	vld1.64 {d6, d7, d8, d9}, [r3, :64]!
+	vld1.64 {d6, d7, d8, d9}, [r3:64]!
 
 	vld1.8 {d1, d2, d3, d4}, [r3], r8
-	vld1.16 {d4, d5, d6, d7}, [r3, :64], r8
+	vld1.16 {d4, d5, d6, d7}, [r3:64], r8
 	vld1.32 {d5, d6, d7, d8}, [r3], r8
-	vld1.64 {d6, d7, d8, d9}, [r3, :64], r8
+	vld1.64 {d6, d7, d8, d9}, [r3:64], r8
 
-@ CHECK: vld1.8 {d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16}, [r0:64]          @ encoding: [0x1f,0x07,0x60,0xf4]
 @ CHECK: vld1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x60,0xf4]
 @ CHECK: vld1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x60,0xf4]
 @ CHECK: vld1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x60,0xf4]
-@ CHECK: vld1.8 {d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf4]
-@ CHECK: vld1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0:64]     @ encoding: [0x1f,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0:128]   @ encoding: [0x6f,0x0a,0x60,0xf4]
 @ CHECK: vld1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x60,0xf4]
 @ CHECK: vld1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x60,0xf4]
 @ CHECK: vld1.8 {d1, d2, d3}, [r3]      @ encoding: [0x0f,0x16,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64] @ encoding: [0x5f,0x46,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3:64]  @ encoding: [0x5f,0x46,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7}, [r3]     @ encoding: [0x8f,0x56,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64] @ encoding: [0xdf,0x66,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3:64]  @ encoding: [0xdf,0x66,0x23,0xf4]
 @ CHECK: vld1.8 {d1, d2, d3, d4}, [r3]  @ encoding: [0x0f,0x12,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64] @ encoding: [0x5f,0x42,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3:64] @ encoding: [0x5f,0x42,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]  @ encoding: [0x8f,0x52,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64] @ encoding: [0xdf,0x62,0x23,0xf4]
-@ CHECK: vld1.8	{d16}, [r0, :64]!       @ encoding: [0x1d,0x07,0x60,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3:64] @ encoding: [0xdf,0x62,0x23,0xf4]
+@ CHECK: vld1.8	{d16}, [r0:64]!         @ encoding: [0x1d,0x07,0x60,0xf4]
 
 @ CHECK: vld1.16 {d16}, [r0]!           @ encoding: [0x4d,0x07,0x60,0xf4]
 @ CHECK: vld1.32 {d16}, [r0]!           @ encoding: [0x8d,0x07,0x60,0xf4]
 @ CHECK: vld1.64 {d16}, [r0]!           @ encoding: [0xcd,0x07,0x60,0xf4]
-@ CHECK: vld1.8 {d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x0a,0x60,0xf4]
-@ CHECK: vld1.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x0a,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0:64]!    @ encoding: [0x1d,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0:128]!  @ encoding: [0x6d,0x0a,0x60,0xf4]
 @ CHECK: vld1.32 {d16, d17}, [r0]!      @ encoding: [0x8d,0x0a,0x60,0xf4]
 @ CHECK: vld1.64 {d16, d17}, [r0]!      @ encoding: [0xcd,0x0a,0x60,0xf4]
 
-@ CHECK: vld1.8 {d16}, [r0, :64], r5    @ encoding: [0x15,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16}, [r0:64], r5      @ encoding: [0x15,0x07,0x60,0xf4]
 @ CHECK: vld1.16 {d16}, [r0], r5        @ encoding: [0x45,0x07,0x60,0xf4]
 @ CHECK: vld1.32 {d16}, [r0], r5        @ encoding: [0x85,0x07,0x60,0xf4]
 @ CHECK: vld1.64 {d16}, [r0], r5        @ encoding: [0xc5,0x07,0x60,0xf4]
-@ CHECK: vld1.8 {d16, d17}, [r0, :64], r5 @ encoding: [0x15,0x0a,0x60,0xf4]
-@ CHECK: vld1.16 {d16, d17}, [r0, :128], r5 @ encoding: [0x65,0x0a,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0:64], r5 @ encoding: [0x15,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0:128], r5 @ encoding: [0x65,0x0a,0x60,0xf4]
 @ CHECK: vld1.32 {d16, d17}, [r0], r5   @ encoding: [0x85,0x0a,0x60,0xf4]
 @ CHECK: vld1.64 {d16, d17}, [r0], r5   @ encoding: [0xc5,0x0a,0x60,0xf4]
 
 @ CHECK: vld1.8	{d1, d2, d3}, [r3]!     @ encoding: [0x0d,0x16,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64]! @ encoding: [0x5d,0x46,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3:64]! @ encoding: [0x5d,0x46,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7}, [r3]!     @ encoding: [0x8d,0x56,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64]! @ encoding: [0xdd,0x66,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3:64]! @ encoding: [0xdd,0x66,0x23,0xf4]
 
 @ CHECK: vld1.8	{d1, d2, d3}, [r3], r6  @ encoding: [0x06,0x16,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64], r6 @ encoding: [0x56,0x46,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3:64], r6 @ encoding: [0x56,0x46,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7}, [r3], r6  @ encoding: [0x86,0x56,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64], r6 @ encoding: [0xd6,0x66,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3:64], r6 @ encoding: [0xd6,0x66,0x23,0xf4]
 
 @ CHECK: vld1.8	{d1, d2, d3, d4}, [r3]! @ encoding: [0x0d,0x12,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64]! @ encoding: [0x5d,0x42,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3:64]! @ encoding: [0x5d,0x42,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]! @ encoding: [0x8d,0x52,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64]! @ encoding: [0xdd,0x62,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3:64]! @ encoding: [0xdd,0x62,0x23,0xf4]
 
 @ CHECK: vld1.8	{d1, d2, d3, d4}, [r3], r8 @ encoding: [0x08,0x12,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64], r8 @ encoding: [0x58,0x42,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3:64], r8 @ encoding: [0x58,0x42,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7, d8}, [r3], r8 @ encoding: [0x88,0x52,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64], r8 @ encoding: [0xd8,0x62,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3:64], r8 @ encoding: [0xd8,0x62,0x23,0xf4]
 
 
-	vld2.8	{d16, d17}, [r0, :64]
-	vld2.16	{d16, d17}, [r0, :128]
+	vld2.8	{d16, d17}, [r0:64]
+	vld2.16	{d16, d17}, [r0:128]
 	vld2.32	{d16, d17}, [r0]
-	vld2.8	{d16, d17, d18, d19}, [r0, :64]
-	vld2.16	{d16, d17, d18, d19}, [r0, :128]
-	vld2.32	{d16, d17, d18, d19}, [r0, :256]
+	vld2.8	{d16, d17, d18, d19}, [r0:64]
+	vld2.16	{d16, d17, d18, d19}, [r0:128]
+	vld2.32	{d16, d17, d18, d19}, [r0:256]
 
-	vld2.8	{d19, d20}, [r0, :64]!
-	vld2.16	{d16, d17}, [r0, :128]!
+	vld2.8	{d19, d20}, [r0:64]!
+	vld2.16	{d16, d17}, [r0:128]!
 	vld2.32	{q10}, [r0]!
-	vld2.8	{d4-d7}, [r0, :64]!
-	vld2.16	{d1, d2, d3, d4}, [r0, :128]!
-	vld2.32	{q7, q8}, [r0, :256]!
+	vld2.8	{d4-d7}, [r0:64]!
+	vld2.16	{d1, d2, d3, d4}, [r0:128]!
+	vld2.32	{q7, q8}, [r0:256]!
 
-	vld2.8	{d19, d20}, [r0, :64], r6
-	vld2.16	{d16, d17}, [r0, :128], r6
+	vld2.8	{d19, d20}, [r0:64], r6
+	vld2.16	{d16, d17}, [r0:128], r6
 	vld2.32	{q10}, [r0], r6
-	vld2.8	{d4-d7}, [r0, :64], r6
-	vld2.16	{d1, d2, d3, d4}, [r0, :128], r6
-	vld2.32	{q7, q8}, [r0, :256], r6
+	vld2.8	{d4-d7}, [r0:64], r6
+	vld2.16	{d1, d2, d3, d4}, [r0:128], r6
+	vld2.32	{q7, q8}, [r0:256], r6
 
-@ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf4]
-@ CHECK: vld2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x60,0xf4]
+@ CHECK: vld2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x60,0xf4]
 @ CHECK: vld2.32 {d16, d17}, [r0] @ encoding: [0x8f,0x08,0x60,0xf4]
-@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf4]
-@ CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
-@ CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x60,0xf4]
+@ CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x60,0xf4]
 
-@ CHECK: vld2.8	{d19, d20}, [r0, :64]!  @ encoding: [0x1d,0x38,0x60,0xf4]
-@ CHECK: vld2.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x08,0x60,0xf4]
+@ CHECK: vld2.8	{d19, d20}, [r0:64]!  @ encoding: [0x1d,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0:128]! @ encoding: [0x6d,0x08,0x60,0xf4]
 @ CHECK: vld2.32 {d20, d21}, [r0]!       @ encoding: [0x8d,0x48,0x60,0xf4]
-@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64]! @ encoding: [0x1d,0x43,0x20,0xf4]
-@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128]! @ encoding: [0x6d,0x13,0x20,0xf4]
-@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256]! @ encoding: [0xbd,0xe3,0x20,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0:64]! @ encoding: [0x1d,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0:128]! @ encoding: [0x6d,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0:256]! @ encoding: [0xbd,0xe3,0x20,0xf4]
 
-@ CHECK: vld2.8	{d19, d20}, [r0, :64], r6 @ encoding: [0x16,0x38,0x60,0xf4]
-@ CHECK: vld2.16 {d16, d17}, [r0, :128], r6 @ encoding: [0x66,0x08,0x60,0xf4]
+@ CHECK: vld2.8	{d19, d20}, [r0:64], r6 @ encoding: [0x16,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0:128], r6 @ encoding: [0x66,0x08,0x60,0xf4]
 @ CHECK: vld2.32 {d20, d21}, [r0], r6    @ encoding: [0x86,0x48,0x60,0xf4]
-@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64], r6 @ encoding: [0x16,0x43,0x20,0xf4]
-@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128], r6 @ encoding: [0x66,0x13,0x20,0xf4]
-@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0:64], r6 @ encoding: [0x16,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0:128], r6 @ encoding: [0x66,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0:256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
 
 
 	vld3.8 {d16, d17, d18}, [r1]
 	vld3.16 {d6, d7, d8}, [r2]
 	vld3.32 {d1, d2, d3}, [r3]
-	vld3.8 {d16, d18, d20}, [r0, :64]
+	vld3.8 {d16, d18, d20}, [r0:64]
 	vld3.u16 {d27, d29, d31}, [r4]
 	vld3.i32 {d6, d8, d10}, [r5]
 
@@ -171,7 +171,7 @@
 	vld3.p8 {d6, d7, d8}, [r8]!
 	vld3.16 {d9, d10, d11}, [r7]!
 	vld3.f32 {d1, d2, d3}, [r6]!
-	vld3.8 {d16, d18, d20}, [r0, :64]!
+	vld3.8 {d16, d18, d20}, [r0:64]!
 	vld3.p16 {d20, d22, d24}, [r5]!
 	vld3.32 {d5, d7, d9}, [r4]!
 
@@ -179,7 +179,7 @@
 @ CHECK: vld3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x61,0xf4]
 @ CHECK: vld3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x22,0xf4]
 @ CHECK: vld3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x23,0xf4]
-@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x60,0xf4]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0:64] @ encoding: [0x1f,0x05,0x60,0xf4]
 @ CHECK: vld3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x64,0xf4]
 @ CHECK: vld3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x25,0xf4]
 @ CHECK: vld3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x26,0xf4]
@@ -191,48 +191,48 @@
 @ CHECK: vld3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x28,0xf4]
 @ CHECK: vld3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x27,0xf4]
 @ CHECK: vld3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x26,0xf4]
-@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x60,0xf4]
 @ CHECK: vld3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x65,0xf4]
 @ CHECK: vld3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x24,0xf4]
 
 
-	vld4.8 {d16, d17, d18, d19}, [r1, :64]
-	vld4.16 {d16, d17, d18, d19}, [r2, :128]
-	vld4.32 {d16, d17, d18, d19}, [r3, :256]
-	vld4.8 {d17, d19, d21, d23}, [r5, :256]
+	vld4.8 {d16, d17, d18, d19}, [r1:64]
+	vld4.16 {d16, d17, d18, d19}, [r2:128]
+	vld4.32 {d16, d17, d18, d19}, [r3:256]
+	vld4.8 {d17, d19, d21, d23}, [r5:256]
 	vld4.16 {d17, d19, d21, d23}, [r7]
 	vld4.32 {d16, d18, d20, d22}, [r8]
 
-	vld4.s8 {d16, d17, d18, d19}, [r1, :64]!
-	vld4.s16 {d16, d17, d18, d19}, [r2, :128]!
-	vld4.s32 {d16, d17, d18, d19}, [r3, :256]!
-	vld4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vld4.s8 {d16, d17, d18, d19}, [r1:64]!
+	vld4.s16 {d16, d17, d18, d19}, [r2:128]!
+	vld4.s32 {d16, d17, d18, d19}, [r3:256]!
+	vld4.u8 {d17, d19, d21, d23}, [r5:256]!
 	vld4.u16 {d17, d19, d21, d23}, [r7]!
 	vld4.u32 {d16, d18, d20, d22}, [r8]!
 
-	vld4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vld4.p8 {d16, d17, d18, d19}, [r1:64], r8
 	vld4.p16 {d16, d17, d18, d19}, [r2], r7
-	vld4.f32 {d16, d17, d18, d19}, [r3, :64], r5
-	vld4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vld4.f32 {d16, d17, d18, d19}, [r3:64], r5
+	vld4.i8 {d16, d18, d20, d22}, [r4:256], r2
 	vld4.i16 {d16, d18, d20, d22}, [r6], r3
 	vld4.i32 {d17, d19, d21, d23}, [r9], r4
 
-@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x61,0xf4]
-@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x62,0xf4]
-@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x63,0xf4]
-@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x65,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1:64] @ encoding: [0x1f,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2:128] @ encoding: [0x6f,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3:256] @ encoding: [0xbf,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5:256] @ encoding: [0x3f,0x11,0x65,0xf4]
 @ CHECK: vld4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x67,0xf4]
 @ CHECK: vld4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x68,0xf4]
-@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x61,0xf4]
-@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x62,0xf4]
-@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x63,0xf4]
-@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x65,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1:64]! @ encoding: [0x1d,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2:128]! @ encoding: [0x6d,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3:256]! @ encoding: [0xbd,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5:256]! @ encoding: [0x3d,0x11,0x65,0xf4]
 @ CHECK: vld4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x67,0xf4]
 @ CHECK: vld4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x68,0xf4]
-@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x61,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1:64], r8 @ encoding: [0x18,0x00,0x61,0xf4]
 @ CHECK: vld4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x62,0xf4]
-@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x63,0xf4]
-@ CHECK: vld4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x64,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3:64], r5 @ encoding: [0x95,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d16, d18, d20, d22}, [r4:256], r2 @ encoding: [0x32,0x01,0x64,0xf4]
 @ CHECK: vld4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x66,0xf4]
 @ CHECK: vld4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x69,0xf4]
 
@@ -252,28 +252,28 @@
 @ CHECK: vld1.8	{d4[], d5[]}, [r1], r3  @ encoding: [0x23,0x4c,0xa1,0xf4]
 
 	vld1.8	{d16[3]}, [r0]
-	vld1.16	{d16[2]}, [r0, :16]
-	vld1.32	{d16[1]}, [r0, :32]
+	vld1.16	{d16[2]}, [r0:16]
+	vld1.32	{d16[1]}, [r0:32]
         vld1.p8 d12[6], [r2]!
         vld1.i8 d12[6], [r2], r2
         vld1.u16 d12[3], [r2]!
         vld1.16 d12[2], [r2], r2
 
 @ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
-@ CHECK: vld1.16 {d16[2]}, [r0, :16]    @ encoding: [0x9f,0x04,0xe0,0xf4]
-@ CHECK: vld1.32 {d16[1]}, [r0, :32]    @ encoding: [0xbf,0x08,0xe0,0xf4]
+@ CHECK: vld1.16 {d16[2]}, [r0:16]      @ encoding: [0x9f,0x04,0xe0,0xf4]
+@ CHECK: vld1.32 {d16[1]}, [r0:32]      @ encoding: [0xbf,0x08,0xe0,0xf4]
 @ CHECK: vld1.8	{d12[6]}, [r2]!         @ encoding: [0xcd,0xc0,0xa2,0xf4]
 @ CHECK: vld1.8	{d12[6]}, [r2], r2      @ encoding: [0xc2,0xc0,0xa2,0xf4]
 @ CHECK: vld1.16 {d12[3]}, [r2]!        @ encoding: [0xcd,0xc4,0xa2,0xf4]
 @ CHECK: vld1.16 {d12[2]}, [r2], r2     @ encoding: [0x82,0xc4,0xa2,0xf4]
 
 
-	vld2.8	{d16[1], d17[1]}, [r0, :16]
-	vld2.16	{d16[1], d17[1]}, [r0, :32]
+	vld2.8	{d16[1], d17[1]}, [r0:16]
+	vld2.16	{d16[1], d17[1]}, [r0:32]
 	vld2.32	{d16[1], d17[1]}, [r0]
 	vld2.16	{d17[1], d19[1]}, [r0]
-	vld2.32	{d17[0], d19[0]}, [r0, :64]
-	vld2.32	{d17[0], d19[0]}, [r0, :64]!
+	vld2.32	{d17[0], d19[0]}, [r0:64]
+	vld2.32	{d17[0], d19[0]}, [r0:64]!
         vld2.8 {d2[4], d3[4]}, [r2], r3
         vld2.8 {d2[4], d3[4]}, [r2]!
         vld2.8 {d2[4], d3[4]}, [r2]
@@ -284,12 +284,12 @@
         vld2.32 {d22[ ],d23[ ]}, [r5], r4
         vld2.32 {d22[ ],d24[ ]}, [r6], r4
 
-@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
-@ CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xe0,0xf4]
+@ CHECK: vld2.16 {d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xe0,0xf4]
 @ CHECK: vld2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
 @ CHECK: vld2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
-@ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
-@ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]! @ encoding: [0x5d,0x19,0xe0,0xf4]
+@ CHECK: vld2.32 {d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+@ CHECK: vld2.32 {d17[0], d19[0]}, [r0:64]! @ encoding: [0x5d,0x19,0xe0,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0xa2,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0xa2,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0xa2,0xf4]
@@ -383,15 +383,15 @@
 	vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
 	vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
 
-	vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
-	vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
-	vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
+	vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2:64]!
+	vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3:128]!
 	vld4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
 	vld4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
 
-	vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32], r8
 	vld4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
-	vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3:64], r5
 	vld4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
 	vld4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
 
@@ -400,14 +400,14 @@
 @ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xe3,0xf4]
 @ CHECK: vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xe7,0xf4]
 @ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xe8,0xf4]
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xe1,0xf4]
-@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xe2,0xf4]
-@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xe3,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1:32]! @ encoding: [0x3d,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2:64]! @ encoding: [0x5d,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3:128]! @ encoding: [0xad,0x0b,0xe3,0xf4]
 @ CHECK: vld4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xe7,0xf4]
 @ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xe8,0xf4]
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xe1,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1:32], r8 @ encoding: [0x38,0x03,0xe1,0xf4]
 @ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xe2,0xf4]
-@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3:64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4]
 @ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xe6,0xf4]
 @ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4]
 
@@ -490,8 +490,17 @@
 
 
 @ Register lists can use the range syntax, just like VLDM
-	vld1.f64 {d2-d5}, [r2,:128]!
-	vld1.f64 {d2,d3,d4,d5}, [r2,:128]!
+	vld1.f64 {d2-d5}, [r2:128]!
+	vld1.f64 {d2,d3,d4,d5}, [r2:128]!
 
-@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
-@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2:128]! @ encoding: [0xed,0x22,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2:128]! @ encoding: [0xed,0x22,0x22,0xf4]
+
+
+@ verify that the old incorrect alignment specifier syntax (", :")
+@ still gets accepted.
+        vld2.8	{d16, d17}, [r0, :64]
+        vld2.16	{d16, d17}, [r0, :128]
+
+@ CHECK: vld2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x60,0xf4]
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
index f5feca4..ef9f037 100644
--- a/test/MC/ARM/neon-vst-encoding.s
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -1,67 +1,67 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
 
-	vst1.8	{d16}, [r0, :64]
+	vst1.8	{d16}, [r0:64]
 	vst1.16	{d16}, [r0]
 	vst1.32	{d16}, [r0]
 	vst1.64	{d16}, [r0]
-	vst1.8	{d16, d17}, [r0, :64]
-	vst1.16	{d16, d17}, [r0, :128]
+	vst1.8	{d16, d17}, [r0:64]
+	vst1.16	{d16, d17}, [r0:128]
 	vst1.32	{d16, d17}, [r0]
 	vst1.64	{d16, d17}, [r0]
-        vst1.8  {d16, d17, d18}, [r0, :64]
-        vst1.8  {d16, d17, d18}, [r0, :64]!
+        vst1.8  {d16, d17, d18}, [r0:64]
+        vst1.8  {d16, d17, d18}, [r0:64]!
         vst1.8  {d16, d17, d18}, [r0], r3
-        vst1.8  {d16, d17, d18, d19}, [r0, :64]
-        vst1.16  {d16, d17, d18, d19}, [r1, :64]!
+        vst1.8  {d16, d17, d18, d19}, [r0:64]
+        vst1.16  {d16, d17, d18, d19}, [r1:64]!
         vst1.64  {d16, d17, d18, d19}, [r3], r2
 
-@ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf4]
+@ CHECK: vst1.8	{d16}, [r0:64]        @ encoding: [0x1f,0x07,0x40,0xf4]
 @ CHECK: vst1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x40,0xf4]
 @ CHECK: vst1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x40,0xf4]
 @ CHECK: vst1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x40,0xf4]
-@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
-@ CHECK: vst1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
+@ CHECK: vst1.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x0a,0x40,0xf4]
 @ CHECK: vst1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x40,0xf4]
 @ CHECK: vst1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x40,0xf4]
-@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x06,0x40,0xf4]
-@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64]! @ encoding: [0x1d,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0:64] @ encoding: [0x1f,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0:64]! @ encoding: [0x1d,0x06,0x40,0xf4]
 @ CHECK: vst1.8	{d16, d17, d18}, [r0], r3 @ encoding: [0x03,0x06,0x40,0xf4]
-@ CHECK: vst1.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x02,0x40,0xf4]
-@ CHECK: vst1.16 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x5d,0x02,0x41,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x02,0x40,0xf4]
+@ CHECK: vst1.16 {d16, d17, d18, d19}, [r1:64]! @ encoding: [0x5d,0x02,0x41,0xf4]
 @ CHECK: vst1.64 {d16, d17, d18, d19}, [r3], r2 @ encoding: [0xc2,0x02,0x43,0xf4]
 
 
-	vst2.8	{d16, d17}, [r0, :64]
-	vst2.16	{d16, d17}, [r0, :128]
+	vst2.8	{d16, d17}, [r0:64]
+	vst2.16	{d16, d17}, [r0:128]
 	vst2.32	{d16, d17}, [r0]
-	vst2.8	{d16, d17, d18, d19}, [r0, :64]
-	vst2.16	{d16, d17, d18, d19}, [r0, :128]
-	vst2.32	{d16, d17, d18, d19}, [r0, :256]
-	vst2.8	{d16, d17}, [r0, :64]!
-	vst2.16	{q15}, [r0, :128]!
+	vst2.8	{d16, d17, d18, d19}, [r0:64]
+	vst2.16	{d16, d17, d18, d19}, [r0:128]
+	vst2.32	{d16, d17, d18, d19}, [r0:256]
+	vst2.8	{d16, d17}, [r0:64]!
+	vst2.16	{q15}, [r0:128]!
 	vst2.32	{d14, d15}, [r0]!
-	vst2.8	{d16, d17, d18, d19}, [r0, :64]!
-	vst2.16	{d18-d21}, [r0, :128]!
-	vst2.32	{q4, q5}, [r0, :256]!
+	vst2.8	{d16, d17, d18, d19}, [r0:64]!
+	vst2.16	{d18-d21}, [r0:128]!
+	vst2.32	{q4, q5}, [r0:256]!
 
-@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf4]
-@ CHECK: vst2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x40,0xf4]
+@ CHECK: vst2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x40,0xf4]
 @ CHECK: vst2.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x08,0x40,0xf4]
-@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf4]
-@ CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
-@ CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
-@ CHECK: vst2.8	{d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x08,0x40,0xf4]
-@ CHECK: vst2.16	{d30, d31}, [r0, :128]! @ encoding: [0x6d,0xe8,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x40,0xf4]
+@ CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x40,0xf4]
+@ CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17}, [r0:64]!  @ encoding: [0x1d,0x08,0x40,0xf4]
+@ CHECK: vst2.16	{d30, d31}, [r0:128]! @ encoding: [0x6d,0xe8,0x40,0xf4]
 @ CHECK: vst2.32	{d14, d15}, [r0]!       @ encoding: [0x8d,0xe8,0x00,0xf4]
-@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]! @ encoding: [0x1d,0x03,0x40,0xf4]
-@ CHECK: vst2.16	{d18, d19, d20, d21}, [r0, :128]! @ encoding: [0x6d,0x23,0x40,0xf4]
-@ CHECK: vst2.32	{d8, d9, d10, d11}, [r0, :256]! @ encoding: [0xbd,0x83,0x00,0xf4]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64]! @ encoding: [0x1d,0x03,0x40,0xf4]
+@ CHECK: vst2.16	{d18, d19, d20, d21}, [r0:128]! @ encoding: [0x6d,0x23,0x40,0xf4]
+@ CHECK: vst2.32	{d8, d9, d10, d11}, [r0:256]! @ encoding: [0xbd,0x83,0x00,0xf4]
 
 
 	vst3.8 {d16, d17, d18}, [r1]
 	vst3.16 {d6, d7, d8}, [r2]
 	vst3.32 {d1, d2, d3}, [r3]
-	vst3.8 {d16, d18, d20}, [r0, :64]
+	vst3.8 {d16, d18, d20}, [r0:64]
 	vst3.u16 {d27, d29, d31}, [r4]
 	vst3.i32 {d6, d8, d10}, [r5]
 
@@ -75,14 +75,14 @@
 	vst3.p8 {d6, d7, d8}, [r8]!
 	vst3.16 {d9, d10, d11}, [r7]!
 	vst3.f32 {d1, d2, d3}, [r6]!
-	vst3.8 {d16, d18, d20}, [r0, :64]!
+	vst3.8 {d16, d18, d20}, [r0:64]!
 	vst3.p16 {d20, d22, d24}, [r5]!
 	vst3.32 {d5, d7, d9}, [r4]!
 
 @ CHECK: vst3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x41,0xf4]
 @ CHECK: vst3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x02,0xf4]
 @ CHECK: vst3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x03,0xf4]
-@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x40,0xf4]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0:64] @ encoding: [0x1f,0x05,0x40,0xf4]
 @ CHECK: vst3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x44,0xf4]
 @ CHECK: vst3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x05,0xf4]
 @ CHECK: vst3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x06,0xf4]
@@ -94,85 +94,85 @@
 @ CHECK: vst3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x08,0xf4]
 @ CHECK: vst3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x07,0xf4]
 @ CHECK: vst3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x06,0xf4]
-@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x40,0xf4]
 @ CHECK: vst3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x45,0xf4]
 @ CHECK: vst3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x04,0xf4]
 
 
-	vst4.8 {d16, d17, d18, d19}, [r1, :64]
-	vst4.16 {d16, d17, d18, d19}, [r2, :128]
-	vst4.32 {d16, d17, d18, d19}, [r3, :256]
-	vst4.8 {d17, d19, d21, d23}, [r5, :256]
+	vst4.8 {d16, d17, d18, d19}, [r1:64]
+	vst4.16 {d16, d17, d18, d19}, [r2:128]
+	vst4.32 {d16, d17, d18, d19}, [r3:256]
+	vst4.8 {d17, d19, d21, d23}, [r5:256]
 	vst4.16 {d17, d19, d21, d23}, [r7]
 	vst4.32 {d16, d18, d20, d22}, [r8]
 
-	vst4.s8 {d16, d17, d18, d19}, [r1, :64]!
-	vst4.s16 {d16, d17, d18, d19}, [r2, :128]!
-	vst4.s32 {d16, d17, d18, d19}, [r3, :256]!
-	vst4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vst4.s8 {d16, d17, d18, d19}, [r1:64]!
+	vst4.s16 {d16, d17, d18, d19}, [r2:128]!
+	vst4.s32 {d16, d17, d18, d19}, [r3:256]!
+	vst4.u8 {d17, d19, d21, d23}, [r5:256]!
 	vst4.u16 {d17, d19, d21, d23}, [r7]!
 	vst4.u32 {d16, d18, d20, d22}, [r8]!
 
-	vst4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vst4.p8 {d16, d17, d18, d19}, [r1:64], r8
 	vst4.p16 {d16, d17, d18, d19}, [r2], r7
-	vst4.f32 {d16, d17, d18, d19}, [r3, :64], r5
-	vst4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vst4.f32 {d16, d17, d18, d19}, [r3:64], r5
+	vst4.i8 {d16, d18, d20, d22}, [r4:256], r2
 	vst4.i16 {d16, d18, d20, d22}, [r6], r3
 	vst4.i32 {d17, d19, d21, d23}, [r9], r4
 
-@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x41,0xf4]
-@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x42,0xf4]
-@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x43,0xf4]
-@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x45,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1:64] @ encoding: [0x1f,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2:128] @ encoding: [0x6f,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3:256] @ encoding: [0xbf,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5:256] @ encoding: [0x3f,0x11,0x45,0xf4]
 @ CHECK: vst4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x47,0xf4]
 @ CHECK: vst4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x48,0xf4]
-@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x41,0xf4]
-@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x42,0xf4]
-@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x43,0xf4]
-@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x45,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1:64]! @ encoding: [0x1d,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2:128]! @ encoding: [0x6d,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3:256]! @ encoding: [0xbd,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5:256]! @ encoding: [0x3d,0x11,0x45,0xf4]
 @ CHECK: vst4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x47,0xf4]
 @ CHECK: vst4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x48,0xf4]
-@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x41,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1:64], r8 @ encoding: [0x18,0x00,0x41,0xf4]
 @ CHECK: vst4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x42,0xf4]
-@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x43,0xf4]
-@ CHECK: vst4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x44,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3:64], r5 @ encoding: [0x95,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d16, d18, d20, d22}, [r4:256], r2 @ encoding: [0x32,0x01,0x44,0xf4]
 @ CHECK: vst4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x46,0xf4]
 @ CHECK: vst4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x49,0xf4]
 
 
-	vst2.8	{d16[1], d17[1]}, [r0, :16]
-	vst2.p16	{d16[1], d17[1]}, [r0, :32]
+	vst2.8	{d16[1], d17[1]}, [r0:16]
+	vst2.p16	{d16[1], d17[1]}, [r0:32]
 	vst2.i32	{d16[1], d17[1]}, [r0]
 	vst2.u16	{d17[1], d19[1]}, [r0]
-	vst2.f32	{d17[0], d19[0]}, [r0, :64]
+	vst2.f32	{d17[0], d19[0]}, [r0:64]
 
         vst2.8 {d2[4], d3[4]}, [r2], r3
         vst2.u8 {d2[4], d3[4]}, [r2]!
         vst2.p8 {d2[4], d3[4]}, [r2]
 
         vst2.16 {d17[1], d19[1]}, [r0]
-        vst2.32 {d17[0], d19[0]}, [r0, :64]
+        vst2.32 {d17[0], d19[0]}, [r0:64]
         vst2.i16 {d7[1], d9[1]}, [r1]!
-        vst2.32 {d6[0], d8[0]}, [r2, :64]!
+        vst2.32 {d6[0], d8[0]}, [r2:64]!
         vst2.16 {d2[1], d4[1]}, [r3], r5
-        vst2.u32 {d5[0], d7[0]}, [r4, :64], r7
+        vst2.u32 {d5[0], d7[0]}, [r4:64], r7
 
-@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
-@ CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xc0,0xf4]
+@ CHECK: vst2.16 {d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xc0,0xf4]
 @ CHECK: vst2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
 @ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
-@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xc0,0xf4]
 
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0x82,0xf4]
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0x82,0xf4]
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0x82,0xf4]
 
 @ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
-@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xc0,0xf4]
 @ CHECK: vst2.16 {d7[1], d9[1]}, [r1]!   @ encoding: [0x6d,0x75,0x81,0xf4]
-@ CHECK: vst2.32 {d6[0], d8[0]}, [r2, :64]! @ encoding: [0x5d,0x69,0x82,0xf4]
+@ CHECK: vst2.32 {d6[0], d8[0]}, [r2:64]! @ encoding: [0x5d,0x69,0x82,0xf4]
 @ CHECK: vst2.16 {d2[1], d4[1]}, [r3], r5 @ encoding: [0x65,0x25,0x83,0xf4]
-@ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4]
+@ CHECK: vst2.32 {d5[0], d7[0]}, [r4:64], r7 @ encoding: [0x57,0x59,0x84,0xf4]
 
 
 	vst3.8 {d16[1], d17[1], d18[1]}, [r1]
@@ -216,15 +216,15 @@
 	vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
 	vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
 
-	vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
-	vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
-	vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
+	vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2:64]!
+	vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3:128]!
 	vst4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
 	vst4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
 
-	vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32], r8
 	vst4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
-	vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3:64], r5
 	vst4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
 	vst4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
 
@@ -233,14 +233,14 @@
 @ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xc3,0xf4]
 @ CHECK: vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xc7,0xf4]
 @ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xc8,0xf4]
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xc1,0xf4]
-@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xc2,0xf4]
-@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xc3,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1:32]! @ encoding: [0x3d,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2:64]! @ encoding: [0x5d,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3:128]! @ encoding: [0xad,0x0b,0xc3,0xf4]
 @ CHECK: vst4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xc7,0xf4]
 @ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xc8,0xf4]
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xc1,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1:32], r8 @ encoding: [0x38,0x03,0xc1,0xf4]
 @ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xc2,0xf4]
-@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3:64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4]
 @ CHECK: vst4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xc6,0xf4]
 @ CHECK: vst4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xc9,0xf4]
 
@@ -269,10 +269,17 @@
         vst2.8 {d8, d10}, [r4]
 @ CHECK: vst2.8	{d8, d10}, [r4]         @ encoding: [0x0f,0x89,0x04,0xf4]
 
-        vst1.32 {d9[1]}, [r3, :32]
-        vst1.32 {d27[1]}, [r9, :32]!
-        vst1.32 {d27[1]}, [r3, :32], r5
-@ CHECK: vst1.32	{d9[1]}, [r3, :32]       @ encoding: [0xbf,0x98,0x83,0xf4]
-@ CHECK: vst1.32	{d27[1]}, [r9, :32]!     @ encoding: [0xbd,0xb8,0xc9,0xf4]
-@ CHECK: vst1.32	{d27[1]}, [r3, :32], r5  @ encoding: [0xb5,0xb8,0xc3,0xf4]
+        vst1.32 {d9[1]}, [r3:32]
+        vst1.32 {d27[1]}, [r9:32]!
+        vst1.32 {d27[1]}, [r3:32], r5
+@ CHECK: vst1.32	{d9[1]}, [r3:32]       @ encoding: [0xbf,0x98,0x83,0xf4]
+@ CHECK: vst1.32	{d27[1]}, [r9:32]!     @ encoding: [0xbd,0xb8,0xc9,0xf4]
+@ CHECK: vst1.32	{d27[1]}, [r3:32], r5  @ encoding: [0xb5,0xb8,0xc3,0xf4]
 
+@ verify that the old incorrect alignment specifier syntax (", :")
+@ still gets accepted.
+        vst2.8	{d16, d17}, [r0, :64]
+        vst2.16	{d16, d17}, [r0, :128]
+
+@ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x40,0xf4]
+@ CHECK: vst2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x40,0xf4]
+\ No newline at end of file
diff --git a/test/MC/ARM/neont2-vld-encoding.s b/test/MC/ARM/neont2-vld-encoding.s
index 031205a..7db8552 100644
--- a/test/MC/ARM/neont2-vld-encoding.s
+++ b/test/MC/ARM/neont2-vld-encoding.s
@@ -3,46 +3,46 @@
 
 .code 16
 
-@ CHECK: vld1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf9]
-	vld1.8	{d16}, [r0, :64]
+@ CHECK: vld1.8	{d16}, [r0:64]        @ encoding: [0x1f,0x07,0x60,0xf9]
+	vld1.8	{d16}, [r0:64]
 @ CHECK: vld1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x60,0xf9]
   vld1.16	{d16}, [r0]
 @ CHECK: vld1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x60,0xf9]
   vld1.32	{d16}, [r0]
 @ CHECK: vld1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x60,0xf9]
   vld1.64	{d16}, [r0]
-@ CHECK: vld1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf9]
-  vld1.8	{d16, d17}, [r0, :64]
-@ CHECK: vld1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x60,0xf9]
-  vld1.16	{d16, d17}, [r0, :128]
+@ CHECK: vld1.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x0a,0x60,0xf9]
+  vld1.8	{d16, d17}, [r0:64]
+@ CHECK: vld1.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x0a,0x60,0xf9]
+  vld1.16	{d16, d17}, [r0:128]
 @ CHECK: vld1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x60,0xf9]
   vld1.32	{d16, d17}, [r0]
 @ CHECK: vld1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x60,0xf9]
   vld1.64	{d16, d17}, [r0]
 
-@ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf9]
-  vld2.8	{d16, d17}, [r0, :64]
-@ CHECK: vld2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x60,0xf9]
-  vld2.16	{d16, d17}, [r0, :128]
+@ CHECK: vld2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x60,0xf9]
+  vld2.8	{d16, d17}, [r0:64]
+@ CHECK: vld2.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x08,0x60,0xf9]
+  vld2.16	{d16, d17}, [r0:128]
 @ CHECK: vld2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x60,0xf9]
   vld2.32	{d16, d17}, [r0]
-@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf9]
-  vld2.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf9]
-  vld2.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf9]
-  vld2.32	{d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x60,0xf9]
+  vld2.8	{d16, d17, d18, d19}, [r0:64]
+@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x60,0xf9]
+  vld2.16	{d16, d17, d18, d19}, [r0:128]
+@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x60,0xf9]
+  vld2.32	{d16, d17, d18, d19}, [r0:256]
 
-@ CHECK: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf9]
-  vld3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vld3.8	{d16, d17, d18}, [r0:64] @ encoding: [0x1f,0x04,0x60,0xf9]
+  vld3.8	{d16, d17, d18}, [r0:64]
 @ CHECK: vld3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x60,0xf9]
   vld3.16	{d16, d17, d18}, [r0]
 @ CHECK: vld3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x60,0xf9]
   vld3.32	{d16, d17, d18}, [r0]
-@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf9]
-  vld3.8	{d16, d18, d20}, [r0, :64]!
-@ CHECK: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf9]
-  vld3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vld3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x60,0xf9]
+  vld3.8	{d16, d18, d20}, [r0:64]!
+@ CHECK: vld3.8	{d17, d19, d21}, [r0:64]! @ encoding: [0x1d,0x15,0x60,0xf9]
+  vld3.8	{d17, d19, d21}, [r0:64]!
 @ CHECK: vld3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x60,0xf9] 
   vld3.16	{d16, d18, d20}, [r0]!
 @ CHECK: vld3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x60,0xf9]
@@ -52,16 +52,16 @@
 @ CHECK: vld3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x60,0xf9]
   vld3.32	{d17, d19, d21}, [r0]!
 
-@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x60,0xf9]
-  vld4.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x60,0xf9]
-  vld4.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x00,0x60,0xf9]
-  vld4.32	{d16, d17, d18, d19}, [r0, :256]
-@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x60,0xf9]
-  vld4.8	{d16, d18, d20, d22}, [r0, :256]!
-@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x60,0xf9]
-  vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x00,0x60,0xf9]
+  vld4.8	{d16, d17, d18, d19}, [r0:64]
+@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x00,0x60,0xf9]
+  vld4.16	{d16, d17, d18, d19}, [r0:128]
+@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x00,0x60,0xf9]
+  vld4.32	{d16, d17, d18, d19}, [r0:256]
+@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0:256]! @ encoding: [0x3d,0x01,0x60,0xf9]
+  vld4.8	{d16, d18, d20, d22}, [r0:256]!
+@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0:256]! @ encoding: [0x3d,0x11,0x60,0xf9]
+  vld4.8	{d17, d19, d21, d23}, [r0:256]!
 @ CHECK: vld4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf9]
   vld4.16	{d16, d18, d20, d22}, [r0]!
 @ CHECK: vld4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf9]
@@ -73,21 +73,21 @@
 
 @ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf9]
   vld1.8	{d16[3]}, [r0]
-@ CHECK: vld1.16	{d16[2]}, [r0, :16]     @ encoding: [0x9f,0x04,0xe0,0xf9]
-  vld1.16	{d16[2]}, [r0, :16]
-@ CHECK: vld1.32	{d16[1]}, [r0, :32]     @ encoding: [0xbf,0x08,0xe0,0xf9]
-  vld1.32	{d16[1]}, [r0, :32]
+@ CHECK: vld1.16	{d16[2]}, [r0:16]     @ encoding: [0x9f,0x04,0xe0,0xf9]
+  vld1.16	{d16[2]}, [r0:16]
+@ CHECK: vld1.32	{d16[1]}, [r0:32]     @ encoding: [0xbf,0x08,0xe0,0xf9]
+  vld1.32	{d16[1]}, [r0:32]
 
-@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf9]
-  vld2.8	{d16[1], d17[1]}, [r0, :16]
-@ CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf9]
-  vld2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xe0,0xf9]
+  vld2.8	{d16[1], d17[1]}, [r0:16]
+@ CHECK: vld2.16	{d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xe0,0xf9]
+  vld2.16	{d16[1], d17[1]}, [r0:32]
 @ CHECK: vld2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf9]
   vld2.32	{d16[1], d17[1]}, [r0]
 @ CHECK: vld2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf9]
   vld2.16	{d17[1], d19[1]}, [r0]
-@ CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf9]
-  vld2.32	{d17[0], d19[0]}, [r0, :64]
+@ CHECK: vld2.32	{d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xe0,0xf9]
+  vld2.32	{d17[0], d19[0]}, [r0:64]
 
 @ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf9]
   vld3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -100,13 +100,13 @@
 @ CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0] @ encoding: [0xcf,0x1a,0xe0,0xf9]
   vld3.32	{d17[1], d19[1], d21[1]}, [r0]
 
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf9]
-  vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32] @ encoding: [0x3f,0x03,0xe0,0xf9]
+  vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 @ CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf9]
   vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf9]
-  vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf9]
-  vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128] @ encoding: [0xaf,0x0b,0xe0,0xf9]
+  vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
+@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64] @ encoding: [0x7f,0x07,0xe0,0xf9]
+  vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64]
 @ CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf9]
   vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/test/MC/ARM/neont2-vst-encoding.s b/test/MC/ARM/neont2-vst-encoding.s
index b50d8b6..9adf751 100644
--- a/test/MC/ARM/neont2-vst-encoding.s
+++ b/test/MC/ARM/neont2-vst-encoding.s
@@ -3,46 +3,46 @@
 
 .code 16
 
-@ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf9]
-  vst1.8	{d16}, [r0, :64]
+@ CHECK: vst1.8	{d16}, [r0:64]        @ encoding: [0x1f,0x07,0x40,0xf9]
+  vst1.8	{d16}, [r0:64]
 @ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf9]
   vst1.16	{d16}, [r0]
 @ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf9]
   vst1.32	{d16}, [r0]
 @ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf9]
   vst1.64	{d16}, [r0]
-@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf9]
-  vst1.8	{d16, d17}, [r0, :64]
-@ CHECK: vst1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x40,0xf9]
-  vst1.16	{d16, d17}, [r0, :128]
+@ CHECK: vst1.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x0a,0x40,0xf9]
+  vst1.8	{d16, d17}, [r0:64]
+@ CHECK: vst1.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x0a,0x40,0xf9]
+  vst1.16	{d16, d17}, [r0:128]
 @ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf9]
   vst1.32	{d16, d17}, [r0]
 @ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf9]
   vst1.64	{d16, d17}, [r0]
 
-@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf9]
-  vst2.8	{d16, d17}, [r0, :64]
-@ CHECK: vst2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x40,0xf9]
-  vst2.16	{d16, d17}, [r0, :128]
+@ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x40,0xf9]
+  vst2.8	{d16, d17}, [r0:64]
+@ CHECK: vst2.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x08,0x40,0xf9]
+  vst2.16	{d16, d17}, [r0:128]
 @ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf9]
   vst2.32	{d16, d17}, [r0]
-@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf9]
-  vst2.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf9]
-  vst2.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf9]
-  vst2.32	{d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x40,0xf9]
+  vst2.8	{d16, d17, d18, d19}, [r0:64]
+@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x40,0xf9]
+  vst2.16	{d16, d17, d18, d19}, [r0:128]
+@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x40,0xf9]
+  vst2.32	{d16, d17, d18, d19}, [r0:256]
 
-@ CHECK: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf9]
-  vst3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vst3.8	{d16, d17, d18}, [r0:64] @ encoding: [0x1f,0x04,0x40,0xf9]
+  vst3.8	{d16, d17, d18}, [r0:64]
 @ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf9]
   vst3.16	{d16, d17, d18}, [r0]
 @ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf9]
   vst3.32	{d16, d17, d18}, [r0]
-@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf9]
-  vst3.8	{d16, d18, d20}, [r0, :64]!
-@ CHECK: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf9]
-  vst3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vst3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x40,0xf9]
+  vst3.8	{d16, d18, d20}, [r0:64]!
+@ CHECK: vst3.8	{d17, d19, d21}, [r0:64]! @ encoding: [0x1d,0x15,0x40,0xf9]
+  vst3.8	{d17, d19, d21}, [r0:64]!
 @ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf9]
   vst3.16	{d16, d18, d20}, [r0]!
 @ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf9]
@@ -52,14 +52,14 @@
 @ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf9]
   vst3.32	{d17, d19, d21}, [r0]!
 
-@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf9]
-  vst4.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf9]
-  vst4.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf9]
-  vst4.8	{d16, d18, d20, d22}, [r0, :256]!
-@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf9]
-  vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x00,0x40,0xf9]
+  vst4.8	{d16, d17, d18, d19}, [r0:64]
+@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x00,0x40,0xf9]
+  vst4.16	{d16, d17, d18, d19}, [r0:128]
+@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0:256]! @ encoding: [0x3d,0x01,0x40,0xf9]
+  vst4.8	{d16, d18, d20, d22}, [r0:256]!
+@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0:256]! @ encoding: [0x3d,0x11,0x40,0xf9]
+  vst4.8	{d17, d19, d21, d23}, [r0:256]!
 @ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf9]
   vst4.16	{d16, d18, d20, d22}, [r0]!
 @ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf9]
@@ -69,16 +69,16 @@
 @ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf9]
   vst4.32	{d17, d19, d21, d23}, [r0]!
 
-@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf9]
-  vst2.8	{d16[1], d17[1]}, [r0, :16]
-@ CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf9]
-  vst2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xc0,0xf9]
+  vst2.8	{d16[1], d17[1]}, [r0:16]
+@ CHECK: vst2.16	{d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xc0,0xf9]
+  vst2.16	{d16[1], d17[1]}, [r0:32]
 @ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf9]
   vst2.32	{d16[1], d17[1]}, [r0]
 @ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf9]
   vst2.16	{d17[1], d19[1]}, [r0]
-@ CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf9]
-  vst2.32	{d17[0], d19[0]}, [r0, :64]
+@ CHECK: vst2.32	{d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xc0,0xf9]
+  vst2.32	{d17[0], d19[0]}, [r0:64]
 
 @ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf9]
   vst3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -91,14 +91,14 @@
 @ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf9]
   vst3.32	{d16[0], d18[0], d20[0]}, [r0]
 
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf9]
-  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32] @ encoding: [0x3f,0x03,0xc0,0xf9]
+  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 @ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf9]
   vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf9]
-  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf9]
-  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128] @ encoding: [0xaf,0x0b,0xc0,0xf9]
+  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
+@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64] @ encoding: [0xff,0x17,0xc0,0xf9]
+  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 @ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf9]
   vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
diff --git a/test/MC/AsmParser/align_invalid.s b/test/MC/AsmParser/align_invalid.s
new file mode 100644
index 0000000..0d06d94
--- /dev/null
+++ b/test/MC/AsmParser/align_invalid.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple i386-linux-gnu < %s 2>&1 | FileCheck %s -check-prefix=ELF
+# RUN: llvm-mc -triple i386-apple-darwin < %s 2>&1 | FileCheck %s -check-prefix=DARWIN
+
+.align 3
+# ELF: error: alignment must be a power of 2
+# DARWIN-NOT: error
+
+.align 32
+# ELF-NOT: error
+# DARWIN: error: invalid alignment value
diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s
index 6c79c38..ed932b2 100644
--- a/test/MC/AsmParser/directive_values.s
+++ b/test/MC/AsmParser/directive_values.s
@@ -63,3 +63,9 @@ TEST7:
 # CHECK-NEXT:   .byte   2
 # CHECK-NEXT:   .byte   3
 # CHECK-NEXT:   .byte   4
+
+TEST8:
+        .long 0x200000UL+1
+        .long 0x200000L+1
+# CHECK: .long 2097153
+# CHECK: .long 2097153
diff --git a/test/MC/AsmParser/section_names.s b/test/MC/AsmParser/section_names.s
new file mode 100644
index 0000000..332cdbe
--- /dev/null
+++ b/test/MC/AsmParser/section_names.s
@@ -0,0 +1,62 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s
+# RUN: elf-dump --dump-section-data < %t | FileCheck %s
+.section .nobits
+.byte 1
+.section .nobits2
+.byte 1
+.section .nobitsfoo
+.byte 1
+.section .init_array
+.byte 1
+.section .init_array2
+.byte 1
+.section .init_arrayfoo
+.byte 1
+.section .fini_array
+.byte 1
+.section .fini_array2
+.byte 1
+.section .fini_arrayfoo
+.byte 1
+.section .preinit_array
+.byte 1
+.section .preinit_array2
+.byte 1
+.section .preinit_arrayfoo
+.byte 1
+.section .note
+.byte 1
+.section .note2
+.byte 1
+.section .notefoo
+.byte 1
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobits'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobits2'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobitsfoo'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_array'
+# CHECK-NEXT:  ('sh_type', 0x0000000e)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_array2'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_arrayfoo'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_array'
+# CHECK-NEXT:  ('sh_type', 0x0000000f)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_array2'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_arrayfoo'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_array'
+# CHECK-NEXT:  ('sh_type', 0x00000010)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_array2'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_arrayfoo'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.note'
+# CHECK-NEXT:  ('sh_type', 0x00000007)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.note2'
+# CHECK-NEXT:  ('sh_type', 0x00000007)
+#CHECK:       (('sh_name', 0x00000{{...}}) # '.notefoo'
+# CHECK-NEXT:  ('sh_type', 0x00000007)
diff --git a/test/MC/COFF/symbol-alias.s b/test/MC/COFF/symbol-alias.s
index 03f07b2..4b1772c 100644
--- a/test/MC/COFF/symbol-alias.s
+++ b/test/MC/COFF/symbol-alias.s
@@ -23,8 +23,11 @@ _bar:
 	.long	0                       # 0x0
 
 
+# Order is important here. Assign _bar_alias_alias before _bar_alias.
 	.globl	_foo_alias
 _foo_alias = _foo
+	.globl	_bar_alias_alias
+_bar_alias_alias = _bar_alias
 	.globl	_bar_alias
 _bar_alias = _bar
 
@@ -52,6 +55,14 @@ _bar_alias = _bar
 // CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS]]
 // CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS]]
 
+// CHECK:      Name               = {{_?}}bar_alias_alias
+// CHECK-NEXT: Value              = [[BAR_VALUE]]
+// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+
 // CHECK:      Name               = {{_?}}bar_alias
 // CHECK-NEXT: Value              = [[BAR_VALUE]]
 // CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
new file mode 100644
index 0000000..4fa2d50
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -0,0 +1,4200 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Add/sub (immediate)
+#------------------------------------------------------------------------------
+# CHECK: add      w4, w5, #0
+# CHECK: add      w2, w3, #4095
+# CHECK: add      w30, w29, #1, lsl #12
+# CHECK: add      w13, w5, #4095, lsl #12
+# CHECK: add      x5, x7, #1638
+0xa4 0x0 0x0 0x11
+0x62 0xfc 0x3f 0x11
+0xbe 0x7 0x40 0x11
+0xad 0xfc 0x7f 0x11
+0xe5 0x98 0x19 0x91
+
+# CHECK: add      w20, wsp, #801
+# CHECK: add      wsp, wsp, #1104
+# CHECK: add      wsp, w30, #4084
+0xf4 0x87 0xc 0x11
+0xff 0x43 0x11 0x11
+0xdf 0xd3 0x3f 0x11
+
+# CHECK: add      x0, x24, #291
+# CHECK: add      x3, x24, #4095, lsl #12
+# CHECK: add      x8, sp, #1074
+# CHECK: add      sp, x29, #3816
+0x0 0x8f 0x4 0x91
+0x3 0xff 0x7f 0x91
+0xe8 0xcb 0x10 0x91
+0xbf 0xa3 0x3b 0x91
+
+# CHECK: sub      w0, wsp, #4077
+# CHECK: sub      w4, w20, #546, lsl #12
+# CHECK: sub      sp, sp, #288
+# CHECK: sub      wsp, w19, #16
+0xe0 0xb7 0x3f 0x51
+0x84 0x8a 0x48 0x51
+0xff 0x83 0x4 0xd1
+0x7f 0x42 0x0 0x51
+
+
+# CHECK: adds     w13, w23, #291, lsl #12
+# CHECK: cmn      w2, #4095
+# CHECK: adds     w20, wsp, #0
+# CHECK: cmn      x3, #1, lsl #12
+0xed 0x8e 0x44 0x31
+0x5f 0xfc 0x3f 0x31
+0xf4 0x3 0x0 0x31
+0x7f 0x4 0x40 0xb1
+
+# CHECK: cmp      sp, #20, lsl #12
+# CHECK: cmp      x30, #4095
+# CHECK: subs     x4, sp, #3822
+0xff 0x53 0x40 0xf1
+0xdf 0xff 0x3f 0xf1
+0xe4 0xbb 0x3b 0xf1
+
+# These should really be CMN
+# CHECK: cmn      w3, #291, lsl #12
+# CHECK: cmn      wsp, #1365
+# CHECK: cmn      sp, #1092, lsl #12
+0x7f 0x8c 0x44 0x31
+0xff 0x57 0x15 0x31
+0xff 0x13 0x51 0xb1
+
+# CHECK: mov      sp, x30
+# CHECK: mov      wsp, w20
+# CHECK: mov      x11, sp
+# CHECK: mov      w24, wsp
+0xdf 0x3 0x0 0x91
+0x9f 0x2 0x0 0x11
+0xeb 0x3 0x0 0x91
+0xf8 0x3 0x0 0x11
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: add      w3, w5, w7
+# CHECK: add      wzr, w3, w5
+# CHECK: add      w20, wzr, w4
+# CHECK: add      w4, w6, wzr
+# CHECK: add      w11, w13, w15
+# CHECK: add      w9, w3, wzr, lsl #10
+# CHECK: add      w17, w29, w20, lsl #31
+# CHECK: add      w21, w22, w23, lsr #0
+# CHECK: add      w24, w25, w26, lsr #18
+# CHECK: add      w27, w28, w29, lsr #31
+# CHECK: add      w2, w3, w4, asr #0
+# CHECK: add      w5, w6, w7, asr #21
+# CHECK: add      w8, w9, w10, asr #31
+0xa3 0x0 0x7 0xb
+0x7f 0x0 0x5 0xb
+0xf4 0x3 0x4 0xb
+0xc4 0x0 0x1f 0xb
+0xab 0x1 0xf 0xb
+0x69 0x28 0x1f 0xb
+0xb1 0x7f 0x14 0xb
+0xd5 0x2 0x57 0xb
+0x38 0x4b 0x5a 0xb
+0x9b 0x7f 0x5d 0xb
+0x62 0x0 0x84 0xb
+0xc5 0x54 0x87 0xb
+0x28 0x7d 0x8a 0xb
+
+# CHECK: add      x3, x5, x7
+# CHECK: add      xzr, x3, x5
+# CHECK: add      x20, xzr, x4
+# CHECK: add      x4, x6, xzr
+# CHECK: add      x11, x13, x15
+# CHECK: add      x9, x3, xzr, lsl #10
+# CHECK: add      x17, x29, x20, lsl #63
+# CHECK: add      x21, x22, x23, lsr #0
+# CHECK: add      x24, x25, x26, lsr #18
+# CHECK: add      x27, x28, x29, lsr #63
+# CHECK: add      x2, x3, x4, asr #0
+# CHECK: add      x5, x6, x7, asr #21
+# CHECK: add      x8, x9, x10, asr #63
+0xa3 0x0 0x7 0x8b
+0x7f 0x0 0x5 0x8b
+0xf4 0x3 0x4 0x8b
+0xc4 0x0 0x1f 0x8b
+0xab 0x1 0xf 0x8b
+0x69 0x28 0x1f 0x8b
+0xb1 0xff 0x14 0x8b
+0xd5 0x2 0x57 0x8b
+0x38 0x4b 0x5a 0x8b
+0x9b 0xff 0x5d 0x8b
+0x62 0x0 0x84 0x8b
+0xc5 0x54 0x87 0x8b
+0x28 0xfd 0x8a 0x8b
+
+# CHECK: adds     w3, w5, w7
+# CHECK: cmn      w3, w5
+# CHECK: adds     w20, wzr, w4
+# CHECK: adds     w4, w6, wzr
+# CHECK: adds     w11, w13, w15
+# CHECK: adds     w9, w3, wzr, lsl #10
+# CHECK: adds     w17, w29, w20, lsl #31
+# CHECK: adds     w21, w22, w23, lsr #0
+# CHECK: adds     w24, w25, w26, lsr #18
+# CHECK: adds     w27, w28, w29, lsr #31
+# CHECK: adds     w2, w3, w4, asr #0
+# CHECK: adds     w5, w6, w7, asr #21
+# CHECK: adds     w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x2b
+0x7f 0x0 0x5 0x2b
+0xf4 0x3 0x4 0x2b
+0xc4 0x0 0x1f 0x2b
+0xab 0x1 0xf 0x2b
+0x69 0x28 0x1f 0x2b
+0xb1 0x7f 0x14 0x2b
+0xd5 0x2 0x57 0x2b
+0x38 0x4b 0x5a 0x2b
+0x9b 0x7f 0x5d 0x2b
+0x62 0x0 0x84 0x2b
+0xc5 0x54 0x87 0x2b
+0x28 0x7d 0x8a 0x2b
+
+# CHECK: adds     x3, x5, x7
+# CHECK: cmn      x3, x5
+# CHECK: adds     x20, xzr, x4
+# CHECK: adds     x4, x6, xzr
+# CHECK: adds     x11, x13, x15
+# CHECK: adds     x9, x3, xzr, lsl #10
+# CHECK: adds     x17, x29, x20, lsl #63
+# CHECK: adds     x21, x22, x23, lsr #0
+# CHECK: adds     x24, x25, x26, lsr #18
+# CHECK: adds     x27, x28, x29, lsr #63
+# CHECK: adds     x2, x3, x4, asr #0
+# CHECK: adds     x5, x6, x7, asr #21
+# CHECK: adds     x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xab
+0x7f 0x0 0x5 0xab
+0xf4 0x3 0x4 0xab
+0xc4 0x0 0x1f 0xab
+0xab 0x1 0xf 0xab
+0x69 0x28 0x1f 0xab
+0xb1 0xff 0x14 0xab
+0xd5 0x2 0x57 0xab
+0x38 0x4b 0x5a 0xab
+0x9b 0xff 0x5d 0xab
+0x62 0x0 0x84 0xab
+0xc5 0x54 0x87 0xab
+0x28 0xfd 0x8a 0xab
+
+# CHECK: sub      w3, w5, w7
+# CHECK: sub      wzr, w3, w5
+# CHECK: sub      w20, wzr, w4
+# CHECK: sub      w4, w6, wzr
+# CHECK: sub      w11, w13, w15
+# CHECK: sub      w9, w3, wzr, lsl #10
+# CHECK: sub      w17, w29, w20, lsl #31
+# CHECK: sub      w21, w22, w23, lsr #0
+# CHECK: sub      w24, w25, w26, lsr #18
+# CHECK: sub      w27, w28, w29, lsr #31
+# CHECK: sub      w2, w3, w4, asr #0
+# CHECK: sub      w5, w6, w7, asr #21
+# CHECK: sub      w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x4b
+0x7f 0x0 0x5 0x4b
+0xf4 0x3 0x4 0x4b
+0xc4 0x0 0x1f 0x4b
+0xab 0x1 0xf 0x4b
+0x69 0x28 0x1f 0x4b
+0xb1 0x7f 0x14 0x4b
+0xd5 0x2 0x57 0x4b
+0x38 0x4b 0x5a 0x4b
+0x9b 0x7f 0x5d 0x4b
+0x62 0x0 0x84 0x4b
+0xc5 0x54 0x87 0x4b
+0x28 0x7d 0x8a 0x4b
+
+# CHECK: sub      x3, x5, x7
+# CHECK: sub      xzr, x3, x5
+# CHECK: sub      x20, xzr, x4
+# CHECK: sub      x4, x6, xzr
+# CHECK: sub      x11, x13, x15
+# CHECK: sub      x9, x3, xzr, lsl #10
+# CHECK: sub      x17, x29, x20, lsl #63
+# CHECK: sub      x21, x22, x23, lsr #0
+# CHECK: sub      x24, x25, x26, lsr #18
+# CHECK: sub      x27, x28, x29, lsr #63
+# CHECK: sub      x2, x3, x4, asr #0
+# CHECK: sub      x5, x6, x7, asr #21
+# CHECK: sub      x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xcb
+0x7f 0x0 0x5 0xcb
+0xf4 0x3 0x4 0xcb
+0xc4 0x0 0x1f 0xcb
+0xab 0x1 0xf 0xcb
+0x69 0x28 0x1f 0xcb
+0xb1 0xff 0x14 0xcb
+0xd5 0x2 0x57 0xcb
+0x38 0x4b 0x5a 0xcb
+0x9b 0xff 0x5d 0xcb
+0x62 0x0 0x84 0xcb
+0xc5 0x54 0x87 0xcb
+0x28 0xfd 0x8a 0xcb
+
+# CHECK: subs     w3, w5, w7
+# CHECK: cmp      w3, w5
+# CHECK: subs     w20, wzr, w4
+# CHECK: subs     w4, w6, wzr
+# CHECK: subs     w11, w13, w15
+# CHECK: subs     w9, w3, wzr, lsl #10
+# CHECK: subs     w17, w29, w20, lsl #31
+# CHECK: subs     w21, w22, w23, lsr #0
+# CHECK: subs     w24, w25, w26, lsr #18
+# CHECK: subs     w27, w28, w29, lsr #31
+# CHECK: subs     w2, w3, w4, asr #0
+# CHECK: subs     w5, w6, w7, asr #21
+# CHECK: subs     w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x6b
+0x7f 0x0 0x5 0x6b
+0xf4 0x3 0x4 0x6b
+0xc4 0x0 0x1f 0x6b
+0xab 0x1 0xf 0x6b
+0x69 0x28 0x1f 0x6b
+0xb1 0x7f 0x14 0x6b
+0xd5 0x2 0x57 0x6b
+0x38 0x4b 0x5a 0x6b
+0x9b 0x7f 0x5d 0x6b
+0x62 0x0 0x84 0x6b
+0xc5 0x54 0x87 0x6b
+0x28 0x7d 0x8a 0x6b
+
+# CHECK: subs     x3, x5, x7
+# CHECK: cmp      x3, x5
+# CHECK: subs     x20, xzr, x4
+# CHECK: subs     x4, x6, xzr
+# CHECK: subs     x11, x13, x15
+# CHECK: subs     x9, x3, xzr, lsl #10
+# CHECK: subs     x17, x29, x20, lsl #63
+# CHECK: subs     x21, x22, x23, lsr #0
+# CHECK: subs     x24, x25, x26, lsr #18
+# CHECK: subs     x27, x28, x29, lsr #63
+# CHECK: subs     x2, x3, x4, asr #0
+# CHECK: subs     x5, x6, x7, asr #21
+# CHECK: subs     x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xeb
+0x7f 0x0 0x5 0xeb
+0xf4 0x3 0x4 0xeb
+0xc4 0x0 0x1f 0xeb
+0xab 0x1 0xf 0xeb
+0x69 0x28 0x1f 0xeb
+0xb1 0xff 0x14 0xeb
+0xd5 0x2 0x57 0xeb
+0x38 0x4b 0x5a 0xeb
+0x9b 0xff 0x5d 0xeb
+0x62 0x0 0x84 0xeb
+0xc5 0x54 0x87 0xeb
+0x28 0xfd 0x8a 0xeb
+
+# CHECK: cmn      w0, w3
+# CHECK: cmn      wzr, w4
+# CHECK: cmn      w5, wzr
+# CHECK: cmn      w6, w7
+# CHECK: cmn      w8, w9, lsl #15
+# CHECK: cmn      w10, w11, lsl #31
+# CHECK: cmn      w12, w13, lsr #0
+# CHECK: cmn      w14, w15, lsr #21
+# CHECK: cmn      w16, w17, lsr #31
+# CHECK: cmn      w18, w19, asr #0
+# CHECK: cmn      w20, w21, asr #22
+# CHECK: cmn      w22, w23, asr #31
+0x1f 0x0 0x3 0x2b
+0xff 0x3 0x4 0x2b
+0xbf 0x0 0x1f 0x2b
+0xdf 0x0 0x7 0x2b
+0x1f 0x3d 0x9 0x2b
+0x5f 0x7d 0xb 0x2b
+0x9f 0x1 0x4d 0x2b
+0xdf 0x55 0x4f 0x2b
+0x1f 0x7e 0x51 0x2b
+0x5f 0x2 0x93 0x2b
+0x9f 0x5a 0x95 0x2b
+0xdf 0x7e 0x97 0x2b
+
+# CHECK: cmn      x0, x3
+# CHECK: cmn      xzr, x4
+# CHECK: cmn      x5, xzr
+# CHECK: cmn      x6, x7
+# CHECK: cmn      x8, x9, lsl #15
+# CHECK: cmn      x10, x11, lsl #63
+# CHECK: cmn      x12, x13, lsr #0
+# CHECK: cmn      x14, x15, lsr #41
+# CHECK: cmn      x16, x17, lsr #63
+# CHECK: cmn      x18, x19, asr #0
+# CHECK: cmn      x20, x21, asr #55
+# CHECK: cmn      x22, x23, asr #63
+0x1f 0x0 0x3 0xab
+0xff 0x3 0x4 0xab
+0xbf 0x0 0x1f 0xab
+0xdf 0x0 0x7 0xab
+0x1f 0x3d 0x9 0xab
+0x5f 0xfd 0xb 0xab
+0x9f 0x1 0x4d 0xab
+0xdf 0xa5 0x4f 0xab
+0x1f 0xfe 0x51 0xab
+0x5f 0x2 0x93 0xab
+0x9f 0xde 0x95 0xab
+0xdf 0xfe 0x97 0xab
+
+# CHECK: cmp      w0, w3
+# CHECK: cmp      wzr, w4
+# CHECK: cmp      w5, wzr
+# CHECK: cmp      w6, w7
+# CHECK: cmp      w8, w9, lsl #15
+# CHECK: cmp      w10, w11, lsl #31
+# CHECK: cmp      w12, w13, lsr #0
+# CHECK: cmp      w14, w15, lsr #21
+# CHECK: cmp      w16, w17, lsr #31
+# CHECK: cmp      w18, w19, asr #0
+# CHECK: cmp      w20, w21, asr #22
+# CHECK: cmp      w22, w23, asr #31
+0x1f 0x0 0x3 0x6b
+0xff 0x3 0x4 0x6b
+0xbf 0x0 0x1f 0x6b
+0xdf 0x0 0x7 0x6b
+0x1f 0x3d 0x9 0x6b
+0x5f 0x7d 0xb 0x6b
+0x9f 0x1 0x4d 0x6b
+0xdf 0x55 0x4f 0x6b
+0x1f 0x7e 0x51 0x6b
+0x5f 0x2 0x93 0x6b
+0x9f 0x5a 0x95 0x6b
+0xdf 0x7e 0x97 0x6b
+
+# CHECK: cmp      x0, x3
+# CHECK: cmp      xzr, x4
+# CHECK: cmp      x5, xzr
+# CHECK: cmp      x6, x7
+# CHECK: cmp      x8, x9, lsl #15
+# CHECK: cmp      x10, x11, lsl #63
+# CHECK: cmp      x12, x13, lsr #0
+# CHECK: cmp      x14, x15, lsr #41
+# CHECK: cmp      x16, x17, lsr #63
+# CHECK: cmp      x18, x19, asr #0
+# CHECK: cmp      x20, x21, asr #55
+# CHECK: cmp      x22, x23, asr #63
+0x1f 0x0 0x3 0xeb
+0xff 0x3 0x4 0xeb
+0xbf 0x0 0x1f 0xeb
+0xdf 0x0 0x7 0xeb
+0x1f 0x3d 0x9 0xeb
+0x5f 0xfd 0xb 0xeb
+0x9f 0x1 0x4d 0xeb
+0xdf 0xa5 0x4f 0xeb
+0x1f 0xfe 0x51 0xeb
+0x5f 0x2 0x93 0xeb
+0x9f 0xde 0x95 0xeb
+0xdf 0xfe 0x97 0xeb
+
+# CHECK: sub      w29, wzr, w30
+# CHECK: sub      w30, wzr, wzr
+# CHECK: sub      wzr, wzr, w0
+# CHECK: sub      w28, wzr, w27
+# CHECK: sub      w26, wzr, w25, lsl #29
+# CHECK: sub      w24, wzr, w23, lsl #31
+# CHECK: sub      w22, wzr, w21, lsr #0
+# CHECK: sub      w20, wzr, w19, lsr #1
+# CHECK: sub      w18, wzr, w17, lsr #31
+# CHECK: sub      w16, wzr, w15, asr #0
+# CHECK: sub      w14, wzr, w13, asr #12
+# CHECK: sub      w12, wzr, w11, asr #31
+0xfd 0x3 0x1e 0x4b
+0xfe 0x3 0x1f 0x4b
+0xff 0x3 0x0 0x4b
+0xfc 0x3 0x1b 0x4b
+0xfa 0x77 0x19 0x4b
+0xf8 0x7f 0x17 0x4b
+0xf6 0x3 0x55 0x4b
+0xf4 0x7 0x53 0x4b
+0xf2 0x7f 0x51 0x4b
+0xf0 0x3 0x8f 0x4b
+0xee 0x33 0x8d 0x4b
+0xec 0x7f 0x8b 0x4b
+
+# CHECK: sub      x29, xzr, x30
+# CHECK: sub      x30, xzr, xzr
+# CHECK: sub      xzr, xzr, x0
+# CHECK: sub      x28, xzr, x27
+# CHECK: sub      x26, xzr, x25, lsl #29
+# CHECK: sub      x24, xzr, x23, lsl #31
+# CHECK: sub      x22, xzr, x21, lsr #0
+# CHECK: sub      x20, xzr, x19, lsr #1
+# CHECK: sub      x18, xzr, x17, lsr #31
+# CHECK: sub      x16, xzr, x15, asr #0
+# CHECK: sub      x14, xzr, x13, asr #12
+# CHECK: sub      x12, xzr, x11, asr #31
+0xfd 0x3 0x1e 0xcb
+0xfe 0x3 0x1f 0xcb
+0xff 0x3 0x0 0xcb
+0xfc 0x3 0x1b 0xcb
+0xfa 0x77 0x19 0xcb
+0xf8 0x7f 0x17 0xcb
+0xf6 0x3 0x55 0xcb
+0xf4 0x7 0x53 0xcb
+0xf2 0x7f 0x51 0xcb
+0xf0 0x3 0x8f 0xcb
+0xee 0x33 0x8d 0xcb
+0xec 0x7f 0x8b 0xcb
+
+# CHECK: subs     w29, wzr, w30
+# CHECK: subs     w30, wzr, wzr
+# CHECK: cmp      wzr, w0
+# CHECK: subs     w28, wzr, w27
+# CHECK: subs     w26, wzr, w25, lsl #29
+# CHECK: subs     w24, wzr, w23, lsl #31
+# CHECK: subs     w22, wzr, w21, lsr #0
+# CHECK: subs     w20, wzr, w19, lsr #1
+# CHECK: subs     w18, wzr, w17, lsr #31
+# CHECK: subs     w16, wzr, w15, asr #0
+# CHECK: subs     w14, wzr, w13, asr #12
+# CHECK: subs     w12, wzr, w11, asr #31
+0xfd 0x3 0x1e 0x6b
+0xfe 0x3 0x1f 0x6b
+0xff 0x3 0x0 0x6b
+0xfc 0x3 0x1b 0x6b
+0xfa 0x77 0x19 0x6b
+0xf8 0x7f 0x17 0x6b
+0xf6 0x3 0x55 0x6b
+0xf4 0x7 0x53 0x6b
+0xf2 0x7f 0x51 0x6b
+0xf0 0x3 0x8f 0x6b
+0xee 0x33 0x8d 0x6b
+0xec 0x7f 0x8b 0x6b
+
+# CHECK: subs     x29, xzr, x30
+# CHECK: subs     x30, xzr, xzr
+# CHECK: cmp      xzr, x0
+# CHECK: subs     x28, xzr, x27
+# CHECK: subs     x26, xzr, x25, lsl #29
+# CHECK: subs     x24, xzr, x23, lsl #31
+# CHECK: subs     x22, xzr, x21, lsr #0
+# CHECK: subs     x20, xzr, x19, lsr #1
+# CHECK: subs     x18, xzr, x17, lsr #31
+# CHECK: subs     x16, xzr, x15, asr #0
+# CHECK: subs     x14, xzr, x13, asr #12
+# CHECK: subs     x12, xzr, x11, asr #31
+0xfd 0x3 0x1e 0xeb
+0xfe 0x3 0x1f 0xeb
+0xff 0x3 0x0 0xeb
+0xfc 0x3 0x1b 0xeb
+0xfa 0x77 0x19 0xeb
+0xf8 0x7f 0x17 0xeb
+0xf6 0x3 0x55 0xeb
+0xf4 0x7 0x53 0xeb
+0xf2 0x7f 0x51 0xeb
+0xf0 0x3 0x8f 0xeb
+0xee 0x33 0x8d 0xeb
+0xec 0x7f 0x8b 0xeb
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: adc      w29, w27, w25
+# CHECK: adc      wzr, w3, w4
+# CHECK: adc      w9, wzr, w10
+# CHECK: adc      w20, w0, wzr
+0x7d 0x3 0x19 0x1a
+0x7f 0x0 0x4 0x1a
+0xe9 0x3 0xa 0x1a
+0x14 0x0 0x1f 0x1a
+
+# CHECK: adc      x29, x27, x25
+# CHECK: adc      xzr, x3, x4
+# CHECK: adc      x9, xzr, x10
+# CHECK: adc      x20, x0, xzr
+0x7d 0x3 0x19 0x9a
+0x7f 0x0 0x4 0x9a
+0xe9 0x3 0xa 0x9a
+0x14 0x0 0x1f 0x9a
+
+# CHECK: adcs     w29, w27, w25
+# CHECK: adcs     wzr, w3, w4
+# CHECK: adcs     w9, wzr, w10
+# CHECK: adcs     w20, w0, wzr
+0x7d 0x3 0x19 0x3a
+0x7f 0x0 0x4 0x3a
+0xe9 0x3 0xa 0x3a
+0x14 0x0 0x1f 0x3a
+
+# CHECK: adcs     x29, x27, x25
+# CHECK: adcs     xzr, x3, x4
+# CHECK: adcs     x9, xzr, x10
+# CHECK: adcs     x20, x0, xzr
+0x7d 0x3 0x19 0xba
+0x7f 0x0 0x4 0xba
+0xe9 0x3 0xa 0xba
+0x14 0x0 0x1f 0xba
+
+# CHECK: sbc      w29, w27, w25
+# CHECK: sbc      wzr, w3, w4
+# CHECK: ngc      w9, w10
+# CHECK: sbc      w20, w0, wzr
+0x7d 0x3 0x19 0x5a
+0x7f 0x0 0x4 0x5a
+0xe9 0x3 0xa 0x5a
+0x14 0x0 0x1f 0x5a
+
+# CHECK: sbc      x29, x27, x25
+# CHECK: sbc      xzr, x3, x4
+# CHECK: ngc      x9, x10
+# CHECK: sbc      x20, x0, xzr
+0x7d 0x3 0x19 0xda
+0x7f 0x0 0x4 0xda
+0xe9 0x3 0xa 0xda
+0x14 0x0 0x1f 0xda
+
+# CHECK: sbcs     w29, w27, w25
+# CHECK: sbcs     wzr, w3, w4
+# CHECK: ngcs     w9, w10
+# CHECK: sbcs     w20, w0, wzr
+0x7d 0x3 0x19 0x7a
+0x7f 0x0 0x4 0x7a
+0xe9 0x3 0xa 0x7a
+0x14 0x0 0x1f 0x7a
+
+# CHECK: sbcs     x29, x27, x25
+# CHECK: sbcs     xzr, x3, x4
+# CHECK: ngcs     x9, x10
+# CHECK: sbcs     x20, x0, xzr
+0x7d 0x3 0x19 0xfa
+0x7f 0x0 0x4 0xfa
+0xe9 0x3 0xa 0xfa
+0x14 0x0 0x1f 0xfa
+
+# CHECK: ngc      w3, w12
+# CHECK: ngc      wzr, w9
+# CHECK: ngc      w23, wzr
+0xe3 0x3 0xc 0x5a
+0xff 0x3 0x9 0x5a
+0xf7 0x3 0x1f 0x5a
+
+# CHECK: ngc      x29, x30
+# CHECK: ngc      xzr, x0
+# CHECK: ngc      x0, xzr
+0xfd 0x3 0x1e 0xda
+0xff 0x3 0x0 0xda
+0xe0 0x3 0x1f 0xda
+
+# CHECK: ngcs     w3, w12
+# CHECK: ngcs     wzr, w9
+# CHECK: ngcs     w23, wzr
+0xe3 0x3 0xc 0x7a
+0xff 0x3 0x9 0x7a
+0xf7 0x3 0x1f 0x7a
+
+# CHECK: ngcs     x29, x30
+# CHECK: ngcs     xzr, x0
+# CHECK: ngcs     x0, xzr
+0xfd 0x3 0x1e 0xfa
+0xff 0x3 0x0 0xfa
+0xe0 0x3 0x1f 0xfa
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: sbfx     x1, x2, #3, #2
+# CHECK: asr      x3, x4, #63
+# CHECK: asr      wzr, wzr, #31
+# CHECK: sbfx     w12, w9, #0, #1
+0x41 0x10 0x43 0x93
+0x83 0xfc 0x7f 0x93
+0xff 0x7f 0x1f 0x13
+0x2c 0x1 0x0 0x13
+
+# CHECK: ubfiz    x4, x5, #52, #11
+# CHECK: ubfx     xzr, x4, #0, #1
+# CHECK: ubfiz    x4, xzr, #1, #6
+# CHECK: lsr      x5, x6, #12
+0xa4 0x28 0x4c 0xd3
+0x9f 0x0 0x40 0xd3
+0xe4 0x17 0x7f 0xd3
+0xc5 0xfc 0x4c 0xd3
+
+# CHECK: bfi      x4, x5, #52, #11
+# CHECK: bfxil    xzr, x4, #0, #1
+# CHECK: bfi      x4, xzr, #1, #6
+# CHECK: bfxil    x5, x6, #12, #52
+0xa4 0x28 0x4c 0xb3
+0x9f 0x0 0x40 0xb3
+0xe4 0x17 0x7f 0xb3
+0xc5 0xfc 0x4c 0xb3
+
+# CHECK: sxtb     w1, w2
+# CHECK: sxtb     xzr, w3
+# CHECK: sxth     w9, w10
+# CHECK: sxth     x0, w1
+# CHECK: sxtw     x3, w30
+0x41 0x1c 0x0 0x13
+0x7f 0x1c 0x40 0x93
+0x49 0x3d 0x0 0x13
+0x20 0x3c 0x40 0x93
+0xc3 0x7f 0x40 0x93
+
+# CHECK: uxtb     w1, w2
+# CHECK: uxth     w9, w10
+# CHECK: ubfx     x3, x30, #0, #32
+0x41 0x1c 0x0 0x53
+0x49 0x3d 0x0 0x53
+0xc3 0x7f 0x40 0xd3
+
+# CHECK: asr      w3, w2, #0
+# CHECK: asr      w9, w10, #31
+# CHECK: asr      x20, x21, #63
+# CHECK: asr      w1, wzr, #3
+0x43 0x7c 0x0 0x13
+0x49 0x7d 0x1f 0x13
+0xb4 0xfe 0x7f 0x93
+0xe1 0x7f 0x3 0x13
+
+# CHECK: lsr      w3, w2, #0
+# CHECK: lsr      w9, w10, #31
+# CHECK: lsr      x20, x21, #63
+# CHECK: lsr      wzr, wzr, #3
+0x43 0x7c 0x0 0x53
+0x49 0x7d 0x1f 0x53
+0xb4 0xfe 0x7f 0xd3
+0xff 0x7f 0x3 0x53
+
+# CHECK: lsr      w3, w2, #0
+# CHECK: lsl      w9, w10, #31
+# CHECK: lsl      x20, x21, #63
+# CHECK: lsl      w1, wzr, #3
+0x43 0x7c 0x0 0x53
+0x49 0x1 0x1 0x53
+0xb4 0x2 0x41 0xd3
+0xe1 0x73 0x1d 0x53
+
+# CHECK: sbfx     w9, w10, #0, #1
+# CHECK: sbfiz    x2, x3, #63, #1
+# CHECK: asr      x19, x20, #0
+# CHECK: sbfiz    x9, x10, #5, #59
+# CHECK: asr      w9, w10, #0
+# CHECK: sbfiz    w11, w12, #31, #1
+# CHECK: sbfiz    w13, w14, #29, #3
+# CHECK: sbfiz    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x13
+0x62 0x0 0x41 0x93
+0x93 0xfe 0x40 0x93
+0x49 0xe9 0x7b 0x93
+0x49 0x7d 0x0 0x13
+0x8b 0x1 0x1 0x13
+0xcd 0x9 0x3 0x13
+0xff 0x2b 0x76 0x93
+
+# CHECK: sbfx     w9, w10, #0, #1
+# CHECK: asr      x2, x3, #63
+# CHECK: asr      x19, x20, #0
+# CHECK: asr      x9, x10, #5
+# CHECK: asr      w9, w10, #0
+# CHECK: asr      w11, w12, #31
+# CHECK: asr      w13, w14, #29
+# CHECK: sbfx     xzr, xzr, #10, #11
+0x49 0x1 0x0 0x13
+0x62 0xfc 0x7f 0x93
+0x93 0xfe 0x40 0x93
+0x49 0xfd 0x45 0x93
+0x49 0x7d 0x0 0x13
+0x8b 0x7d 0x1f 0x13
+0xcd 0x7d 0x1d 0x13
+0xff 0x53 0x4a 0x93
+
+# CHECK: bfxil    w9, w10, #0, #1
+# CHECK: bfi      x2, x3, #63, #1
+# CHECK: bfxil    x19, x20, #0, #64
+# CHECK: bfi      x9, x10, #5, #59
+# CHECK: bfxil    w9, w10, #0, #32
+# CHECK: bfi      w11, w12, #31, #1
+# CHECK: bfi      w13, w14, #29, #3
+# CHECK: bfi      xzr, xzr, #10, #11
+0x49 0x1 0x0 0x33
+0x62 0x0 0x41 0xb3
+0x93 0xfe 0x40 0xb3
+0x49 0xe9 0x7b 0xb3
+0x49 0x7d 0x0 0x33
+0x8b 0x1 0x1 0x33
+0xcd 0x9 0x3 0x33
+0xff 0x2b 0x76 0xb3
+
+# CHECK: bfxil    w9, w10, #0, #1
+# CHECK: bfxil    x2, x3, #63, #1
+# CHECK: bfxil    x19, x20, #0, #64
+# CHECK: bfxil    x9, x10, #5, #59
+# CHECK: bfxil    w9, w10, #0, #32
+# CHECK: bfxil    w11, w12, #31, #1
+# CHECK: bfxil    w13, w14, #29, #3
+# CHECK: bfxil    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x33
+0x62 0xfc 0x7f 0xb3
+0x93 0xfe 0x40 0xb3
+0x49 0xfd 0x45 0xb3
+0x49 0x7d 0x0 0x33
+0x8b 0x7d 0x1f 0x33
+0xcd 0x7d 0x1d 0x33
+0xff 0x53 0x4a 0xb3
+
+# CHECK: ubfx     w9, w10, #0, #1
+# CHECK: lsl      x2, x3, #63
+# CHECK: lsr      x19, x20, #0
+# CHECK: lsl      x9, x10, #5
+# CHECK: lsr      w9, w10, #0
+# CHECK: lsl      w11, w12, #31
+# CHECK: lsl      w13, w14, #29
+# CHECK: ubfiz    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x53
+0x62 0x0 0x41 0xd3
+0x93 0xfe 0x40 0xd3
+0x49 0xe9 0x7b 0xd3
+0x49 0x7d 0x0 0x53
+0x8b 0x1 0x1 0x53
+0xcd 0x9 0x3 0x53
+0xff 0x2b 0x76 0xd3
+
+# CHECK: ubfx     w9, w10, #0, #1
+# CHECK: lsr      x2, x3, #63
+# CHECK: lsr      x19, x20, #0
+# CHECK: lsr      x9, x10, #5
+# CHECK: lsr      w9, w10, #0
+# CHECK: lsr      w11, w12, #31
+# CHECK: lsr      w13, w14, #29
+# CHECK: ubfx     xzr, xzr, #10, #11
+0x49 0x1 0x0 0x53
+0x62 0xfc 0x7f 0xd3
+0x93 0xfe 0x40 0xd3
+0x49 0xfd 0x45 0xd3
+0x49 0x7d 0x0 0x53
+0x8b 0x7d 0x1f 0x53
+0xcd 0x7d 0x1d 0x53
+0xff 0x53 0x4a 0xd3
+
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: cbz      w5, #4
+# CHECK: cbz      x5, #0
+# CHECK: cbnz     x2, #-4
+# CHECK: cbnz     x26, #1048572
+0x25 0x0 0x0 0x34
+0x05 0x0 0x0 0xb4
+0xe2 0xff 0xff 0xb5
+0xfa 0xff 0x7f 0xb5
+
+# CHECK: cbz      wzr, #0
+# CHECK: cbnz     xzr, #0
+0x1f 0x0 0x0 0x34
+0x1f 0x0 0x0 0xb5
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: b.ne #4
+# CHECK: b.ge #1048572
+# CHECK: b.ge #-4
+0x21 0x00 0x00 0x54
+0xea 0xff 0x7f 0x54
+0xea 0xff 0xff 0x54
+
+#------------------------------------------------------------------------------
+# Conditional compare (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: ccmp w1, #31, #0, eq
+# CHECK: ccmp w3, #0, #15, hs
+# CHECK: ccmp wzr, #15, #13, hs
+0x20 0x08 0x5f 0x7a
+0x6f 0x28 0x40 0x7a
+0xed 0x2b 0x4f 0x7a
+
+# CHECK: ccmp x9, #31, #0, le
+# CHECK: ccmp x3, #0, #15, gt
+# CHECK: ccmp xzr, #5, #7, ne
+0x20 0xd9 0x5f 0xfa
+0x6f 0xc8 0x40 0xfa
+0xe7 0x1b 0x45 0xfa
+
+# CHECK: ccmn w1, #31, #0, eq
+# CHECK: ccmn w3, #0, #15, hs
+# CHECK: ccmn wzr, #15, #13, hs
+0x20 0x08 0x5f 0x3a
+0x6f 0x28 0x40 0x3a
+0xed 0x2b 0x4f 0x3a
+
+# CHECK: ccmn x9, #31, #0, le
+# CHECK: ccmn x3, #0, #15, gt
+# CHECK: ccmn xzr, #5, #7, ne
+0x20 0xd9 0x5f 0xba
+0x6f 0xc8 0x40 0xba
+0xe7 0x1b 0x45 0xba
+
+#------------------------------------------------------------------------------
+# Conditional compare (register)
+#------------------------------------------------------------------------------
+
+# CHECK: ccmp w1, wzr, #0, eq
+# CHECK: ccmp w3, w0, #15, hs
+# CHECK: ccmp wzr, w15, #13, hs
+0x20 0x00 0x5f 0x7a
+0x6f 0x20 0x40 0x7a
+0xed 0x23 0x4f 0x7a
+
+# CHECK: ccmp x9, xzr, #0, le
+# CHECK: ccmp x3, x0, #15, gt
+# CHECK: ccmp xzr, x5, #7, ne
+0x20 0xd1 0x5f 0xfa
+0x6f 0xc0 0x40 0xfa
+0xe7 0x13 0x45 0xfa
+
+# CHECK: ccmn w1, wzr, #0, eq
+# CHECK: ccmn w3, w0, #15, hs
+# CHECK: ccmn wzr, w15, #13, hs
+0x20 0x00 0x5f 0x3a
+0x6f 0x20 0x40 0x3a
+0xed 0x23 0x4f 0x3a
+
+# CHECK: ccmn x9, xzr, #0, le
+# CHECK: ccmn x3, x0, #15, gt
+# CHECK: ccmn xzr, x5, #7, ne
+0x20 0xd1 0x5f 0xba
+0x6f 0xc0 0x40 0xba
+0xe7 0x13 0x45 0xba
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+# CHECK: csel     w1, w0, w19, ne
+# CHECK: csel     wzr, w5, w9, eq
+# CHECK: csel     w9, wzr, w30, gt
+# CHECK: csel     w1, w28, wzr, mi
+# CHECK: csel     x19, x23, x29, lt
+# CHECK: csel     xzr, x3, x4, ge
+# CHECK: csel     x5, xzr, x6, hs
+# CHECK: csel     x7, x8, xzr, lo
+0x1 0x10 0x93 0x1a
+0xbf 0x0 0x89 0x1a
+0xe9 0xc3 0x9e 0x1a
+0x81 0x43 0x9f 0x1a
+0xf3 0xb2 0x9d 0x9a
+0x7f 0xa0 0x84 0x9a
+0xe5 0x23 0x86 0x9a
+0x7 0x31 0x9f 0x9a
+
+# CHECK: csinc    w1, w0, w19, ne
+# CHECK: csinc    wzr, w5, w9, eq
+# CHECK: csinc    w9, wzr, w30, gt
+# CHECK: csinc    w1, w28, wzr, mi
+# CHECK: csinc    x19, x23, x29, lt
+# CHECK: csinc    xzr, x3, x4, ge
+# CHECK: csinc    x5, xzr, x6, hs
+# CHECK: csinc    x7, x8, xzr, lo
+0x1 0x14 0x93 0x1a
+0xbf 0x4 0x89 0x1a
+0xe9 0xc7 0x9e 0x1a
+0x81 0x47 0x9f 0x1a
+0xf3 0xb6 0x9d 0x9a
+0x7f 0xa4 0x84 0x9a
+0xe5 0x27 0x86 0x9a
+0x7 0x35 0x9f 0x9a
+
+# CHECK: csinv    w1, w0, w19, ne
+# CHECK: csinv    wzr, w5, w9, eq
+# CHECK: csinv    w9, wzr, w30, gt
+# CHECK: csinv    w1, w28, wzr, mi
+# CHECK: csinv    x19, x23, x29, lt
+# CHECK: csinv    xzr, x3, x4, ge
+# CHECK: csinv    x5, xzr, x6, hs
+# CHECK: csinv    x7, x8, xzr, lo
+0x1 0x10 0x93 0x5a
+0xbf 0x0 0x89 0x5a
+0xe9 0xc3 0x9e 0x5a
+0x81 0x43 0x9f 0x5a
+0xf3 0xb2 0x9d 0xda
+0x7f 0xa0 0x84 0xda
+0xe5 0x23 0x86 0xda
+0x7 0x31 0x9f 0xda
+
+# CHECK: csneg    w1, w0, w19, ne
+# CHECK: csneg    wzr, w5, w9, eq
+# CHECK: csneg    w9, wzr, w30, gt
+# CHECK: csneg    w1, w28, wzr, mi
+# CHECK: csneg    x19, x23, x29, lt
+# CHECK: csneg    xzr, x3, x4, ge
+# CHECK: csneg    x5, xzr, x6, hs
+# CHECK: csneg    x7, x8, xzr, lo
+0x1 0x14 0x93 0x5a
+0xbf 0x4 0x89 0x5a
+0xe9 0xc7 0x9e 0x5a
+0x81 0x47 0x9f 0x5a
+0xf3 0xb6 0x9d 0xda
+0x7f 0xa4 0x84 0xda
+0xe5 0x27 0x86 0xda
+0x7 0x35 0x9f 0xda
+
+# CHECK: csinc    w3, wzr, wzr, ne
+# CHECK: csinc    x9, xzr, xzr, mi
+# CHECK: csinv    w20, wzr, wzr, eq
+# CHECK: csinv    x30, xzr, xzr, lt
+0xe3 0x17 0x9f 0x1a
+0xe9 0x47 0x9f 0x9a
+0xf4 0x3 0x9f 0x5a
+0xfe 0xb3 0x9f 0xda
+
+# CHECK: csinc    w3, w5, w5, le
+# CHECK: csinc    wzr, w4, w4, gt
+# CHECK: csinc    w9, wzr, wzr, ge
+# CHECK: csinc    x3, x5, x5, le
+# CHECK: csinc    xzr, x4, x4, gt
+# CHECK: csinc    x9, xzr, xzr, ge
+0xa3 0xd4 0x85 0x1a
+0x9f 0xc4 0x84 0x1a
+0xe9 0xa7 0x9f 0x1a
+0xa3 0xd4 0x85 0x9a
+0x9f 0xc4 0x84 0x9a
+0xe9 0xa7 0x9f 0x9a
+
+# CHECK: csinv    w3, w5, w5, le
+# CHECK: csinv    wzr, w4, w4, gt
+# CHECK: csinv    w9, wzr, wzr, ge
+# CHECK: csinv    x3, x5, x5, le
+# CHECK: csinv    xzr, x4, x4, gt
+# CHECK: csinv    x9, xzr, xzr, ge
+0xa3 0xd0 0x85 0x5a
+0x9f 0xc0 0x84 0x5a
+0xe9 0xa3 0x9f 0x5a
+0xa3 0xd0 0x85 0xda
+0x9f 0xc0 0x84 0xda
+0xe9 0xa3 0x9f 0xda
+
+# CHECK: csneg     w3, w5, w5, le
+# CHECK: csneg     wzr, w4, w4, gt
+# CHECK: csneg     w9, wzr, wzr, ge
+# CHECK: csneg     x3, x5, x5, le
+# CHECK: csneg     xzr, x4, x4, gt
+# CHECK: csneg     x9, xzr, xzr, ge
+0xa3 0xd4 0x85 0x5a
+0x9f 0xc4 0x84 0x5a
+0xe9 0xa7 0x9f 0x5a
+0xa3 0xd4 0x85 0xda
+0x9f 0xc4 0x84 0xda
+0xe9 0xa7 0x9f 0xda
+
+#------------------------------------------------------------------------------
+# Data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: rbit	w0, w7
+# CHECK: rbit   x18, x3
+# CHECK: rev16	w17, w1
+# CHECK: rev16	x5, x2
+# CHECK: rev	w18, w0
+# CHECK: rev32	x20, x1
+0xe0 0x00 0xc0 0x5a
+0x72 0x00 0xc0 0xda
+0x31 0x04 0xc0 0x5a
+0x45 0x04 0xc0 0xda
+0x12 0x08 0xc0 0x5a
+0x34 0x08 0xc0 0xda
+
+# CHECK: rev	x22, x2
+# CHECK: clz	w24, w3
+# CHECK: clz	x26, x4
+# CHECK: cls	w3, w5
+# CHECK: cls	x20, x5
+0x56 0x0c 0xc0 0xda
+0x78 0x10 0xc0 0x5a
+0x9a 0x10 0xc0 0xda
+0xa3 0x14 0xc0 0x5a
+0xb4 0x14 0xc0 0xda
+
+#------------------------------------------------------------------------------
+# Data-processing (2 source)
+#------------------------------------------------------------------------------
+
+# CHECK: crc32b  w5, w7, w20
+# CHECK: crc32h  w28, wzr, w30
+# CHECK: crc32w  w0, w1, w2
+# CHECK: crc32x  w7, w9, x20
+# CHECK: crc32cb w9, w5, w4
+# CHECK: crc32ch w13, w17, w25
+# CHECK: crc32cw wzr, w3, w5
+# CHECK: crc32cx w18, w16, xzr
+0xe5 0x40 0xd4 0x1a
+0xfc 0x47 0xde 0x1a
+0x20 0x48 0xc2 0x1a
+0x27 0x4d 0xd4 0x9a
+0xa9 0x50 0xc4 0x1a
+0x2d 0x56 0xd9 0x1a
+0x7f 0x58 0xc5 0x1a
+0x12 0x5e 0xdf 0x9a
+
+# CHECK: udiv	w0, w7, w10
+# CHECK: udiv	x9, x22, x4
+# CHECK: sdiv	w12, w21, w0
+# CHECK: sdiv	x13, x2, x1
+# CHECK: lsl	w11, w12, w13
+# CHECK: lsl	x14, x15, x16
+# CHECK: lsr	w17, w18, w19
+# CHECK: lsr	x20, x21, x22
+# CHECK: asr	w23, w24, w25
+# CHECK: asr	x26, x27, x28
+# CHECK: ror	w0, w1, w2
+# CHECK: ror    x3, x4, x5
+0xe0 0x08 0xca 0x1a
+0xc9 0x0a 0xc4 0x9a
+0xac 0x0e 0xc0 0x1a
+0x4d 0x0c 0xc1 0x9a
+0x8b 0x21 0xcd 0x1a
+0xee 0x21 0xd0 0x9a
+0x51 0x26 0xd3 0x1a
+0xb4 0x26 0xd6 0x9a
+0x17 0x2b 0xd9 0x1a
+0x7a 0x2b 0xdc 0x9a
+0x20 0x2c 0xc2 0x1a
+0x83 0x2c 0xc5 0x9a
+
+# CHECK: lsl	w6, w7, w8
+# CHECK: lsl	x9, x10, x11
+# CHECK: lsr	w12, w13, w14
+# CHECK: lsr	x15, x16, x17
+# CHECK: asr	w18, w19, w20
+# CHECK: asr	x21, x22, x23
+# CHECK: ror	w24, w25, w26
+# CHECK: ror	x27, x28, x29
+0xe6 0x20 0xc8 0x1a
+0x49 0x21 0xcb 0x9a
+0xac 0x25 0xce 0x1a
+0x0f 0x26 0xd1 0x9a
+0x72 0x2a 0xd4 0x1a
+0xd5 0x2a 0xd7 0x9a
+0x38 0x2f 0xda 0x1a
+0x9b 0x2f 0xdd 0x9a
+
+#------------------------------------------------------------------------------
+# Data-processing (3 sources)
+#------------------------------------------------------------------------------
+
+# First check some non-canonical encodings where Ra is not 0b11111 (only umulh
+# and smulh have them).
+
+# CHECK: smulh    x30, x29, x28
+# CHECK: smulh    xzr, x27, x26
+# CHECK: umulh    x30, x29, x28
+# CHECK: umulh    x23, x30, xzr
+0xbe 0x73 0x5c 0x9b
+0x7f 0x2f 0x5a 0x9b
+0xbe 0x3f 0xdc 0x9b
+0xd7 0x77 0xdf 0x9b
+
+# Now onto the boilerplate stuff
+
+# CHECK: madd     w1, w3, w7, w4
+# CHECK: madd     wzr, w0, w9, w11
+# CHECK: madd     w13, wzr, w4, w4
+# CHECK: madd     w19, w30, wzr, w29
+# CHECK: mul      w4, w5, w6
+0x61 0x10 0x7 0x1b
+0x1f 0x2c 0x9 0x1b
+0xed 0x13 0x4 0x1b
+0xd3 0x77 0x1f 0x1b
+0xa4 0x7c 0x6 0x1b
+
+# CHECK: madd     x1, x3, x7, x4
+# CHECK: madd     xzr, x0, x9, x11
+# CHECK: madd     x13, xzr, x4, x4
+# CHECK: madd     x19, x30, xzr, x29
+# CHECK: mul      x4, x5, x6
+0x61 0x10 0x7 0x9b
+0x1f 0x2c 0x9 0x9b
+0xed 0x13 0x4 0x9b
+0xd3 0x77 0x1f 0x9b
+0xa4 0x7c 0x6 0x9b
+
+# CHECK: msub     w1, w3, w7, w4
+# CHECK: msub     wzr, w0, w9, w11
+# CHECK: msub     w13, wzr, w4, w4
+# CHECK: msub     w19, w30, wzr, w29
+# CHECK: mneg     w4, w5, w6
+0x61 0x90 0x7 0x1b
+0x1f 0xac 0x9 0x1b
+0xed 0x93 0x4 0x1b
+0xd3 0xf7 0x1f 0x1b
+0xa4 0xfc 0x6 0x1b
+
+# CHECK: msub     x1, x3, x7, x4
+# CHECK: msub     xzr, x0, x9, x11
+# CHECK: msub     x13, xzr, x4, x4
+# CHECK: msub     x19, x30, xzr, x29
+# CHECK: mneg     x4, x5, x6
+0x61 0x90 0x7 0x9b
+0x1f 0xac 0x9 0x9b
+0xed 0x93 0x4 0x9b
+0xd3 0xf7 0x1f 0x9b
+0xa4 0xfc 0x6 0x9b
+
+# CHECK: smaddl   x3, w5, w2, x9
+# CHECK: smaddl   xzr, w10, w11, x12
+# CHECK: smaddl   x13, wzr, w14, x15
+# CHECK: smaddl   x16, w17, wzr, x18
+# CHECK: smull    x19, w20, w21
+0xa3 0x24 0x22 0x9b
+0x5f 0x31 0x2b 0x9b
+0xed 0x3f 0x2e 0x9b
+0x30 0x4a 0x3f 0x9b
+0x93 0x7e 0x35 0x9b
+
+# CHECK: smsubl   x3, w5, w2, x9
+# CHECK: smsubl   xzr, w10, w11, x12
+# CHECK: smsubl   x13, wzr, w14, x15
+# CHECK: smsubl   x16, w17, wzr, x18
+# CHECK: smnegl   x19, w20, w21
+0xa3 0xa4 0x22 0x9b
+0x5f 0xb1 0x2b 0x9b
+0xed 0xbf 0x2e 0x9b
+0x30 0xca 0x3f 0x9b
+0x93 0xfe 0x35 0x9b
+
+# CHECK: umaddl   x3, w5, w2, x9
+# CHECK: umaddl   xzr, w10, w11, x12
+# CHECK: umaddl   x13, wzr, w14, x15
+# CHECK: umaddl   x16, w17, wzr, x18
+# CHECK: umull    x19, w20, w21
+0xa3 0x24 0xa2 0x9b
+0x5f 0x31 0xab 0x9b
+0xed 0x3f 0xae 0x9b
+0x30 0x4a 0xbf 0x9b
+0x93 0x7e 0xb5 0x9b
+
+# CHECK: umsubl   x3, w5, w2, x9
+# CHECK: umsubl   xzr, w10, w11, x12
+# CHECK: umsubl   x13, wzr, w14, x15
+# CHECK: umsubl   x16, w17, wzr, x18
+# CHECK: umnegl   x19, w20, w21
+0xa3 0xa4 0xa2 0x9b
+0x5f 0xb1 0xab 0x9b
+0xed 0xbf 0xae 0x9b
+0x30 0xca 0xbf 0x9b
+0x93 0xfe 0xb5 0x9b
+
+# CHECK: smulh    x30, x29, x28
+# CHECK: smulh    xzr, x27, x26
+# CHECK: smulh    x25, xzr, x24
+# CHECK: smulh    x23, x22, xzr
+0xbe 0x7f 0x5c 0x9b
+0x7f 0x7f 0x5a 0x9b
+0xf9 0x7f 0x58 0x9b
+0xd7 0x7e 0x5f 0x9b
+
+# CHECK: umulh    x30, x29, x28
+# CHECK: umulh    xzr, x27, x26
+# CHECK: umulh    x25, xzr, x24
+# CHECK: umulh    x23, x22, xzr
+0xbe 0x7f 0xdc 0x9b
+0x7f 0x7f 0xda 0x9b
+0xf9 0x7f 0xd8 0x9b
+0xd7 0x7e 0xdf 0x9b
+
+# CHECK: mul      w3, w4, w5
+# CHECK: mul      wzr, w6, w7
+# CHECK: mul      w8, wzr, w9
+# CHECK: mul      w10, w11, wzr
+# CHECK: mul      x12, x13, x14
+# CHECK: mul      xzr, x15, x16
+# CHECK: mul      x17, xzr, x18
+# CHECK: mul      x19, x20, xzr
+0x83 0x7c 0x5 0x1b
+0xdf 0x7c 0x7 0x1b
+0xe8 0x7f 0x9 0x1b
+0x6a 0x7d 0x1f 0x1b
+0xac 0x7d 0xe 0x9b
+0xff 0x7d 0x10 0x9b
+0xf1 0x7f 0x12 0x9b
+0x93 0x7e 0x1f 0x9b
+
+# CHECK: mneg     w21, w22, w23
+# CHECK: mneg     wzr, w24, w25
+# CHECK: mneg     w26, wzr, w27
+# CHECK: mneg     w28, w29, wzr
+0xd5 0xfe 0x17 0x1b
+0x1f 0xff 0x19 0x1b
+0xfa 0xff 0x1b 0x1b
+0xbc 0xff 0x1f 0x1b
+
+# CHECK: smull    x11, w13, w17
+# CHECK: umull    x11, w13, w17
+# CHECK: smnegl   x11, w13, w17
+# CHECK: umnegl   x11, w13, w17
+0xab 0x7d 0x31 0x9b
+0xab 0x7d 0xb1 0x9b
+0xab 0xfd 0x31 0x9b
+0xab 0xfd 0xb1 0x9b
+
+#------------------------------------------------------------------------------
+# Exception generation
+#------------------------------------------------------------------------------
+
+# CHECK: svc      #0
+# CHECK: svc      #65535
+0x1 0x0 0x0 0xd4
+0xe1 0xff 0x1f 0xd4
+
+# CHECK: hvc      #1
+# CHECK: smc      #12000
+# CHECK: brk      #12
+# CHECK: hlt      #123
+0x22 0x0 0x0 0xd4
+0x3 0xdc 0x5 0xd4
+0x80 0x1 0x20 0xd4
+0x60 0xf 0x40 0xd4
+
+# CHECK: dcps1    #42
+# CHECK: dcps2    #9
+# CHECK: dcps3    #1000
+0x41 0x5 0xa0 0xd4
+0x22 0x1 0xa0 0xd4
+0x3 0x7d 0xa0 0xd4
+
+# CHECK: dcps1
+# CHECK: dcps2
+# CHECK: dcps3
+0x1 0x0 0xa0 0xd4
+0x2 0x0 0xa0 0xd4
+0x3 0x0 0xa0 0xd4
+
+#------------------------------------------------------------------------------
+# Extract (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: extr     w3, w5, w7, #0
+# CHECK: extr     w11, w13, w17, #31
+0xa3 0x0 0x87 0x13
+0xab 0x7d 0x91 0x13
+
+# CHECK: extr     x3, x5, x7, #15
+# CHECK: extr     x11, x13, x17, #63
+0xa3 0x3c 0xc7 0x93
+0xab 0xfd 0xd1 0x93
+
+# CHECK: extr     x19, x23, x23, #24
+# CHECK: extr     x29, xzr, xzr, #63
+# CHECK: extr     w9, w13, w13, #31
+0xf3 0x62 0xd7 0x93
+0xfd 0xff 0xdf 0x93
+0xa9 0x7d 0x8d 0x13
+
+#------------------------------------------------------------------------------
+# Floating-point compare
+#------------------------------------------------------------------------------
+
+# CHECK: fcmp    s3, s5
+# CHECK: fcmp    s31, #0.0
+# CHECK: fcmp    s31, #0.0
+0x60 0x20 0x25 0x1e
+0xe8 0x23 0x20 0x1e
+0xe8 0x23 0x3f 0x1e
+
+# CHECK: fcmpe   s29, s30
+# CHECK: fcmpe   s15, #0.0
+# CHECK: fcmpe   s15, #0.0
+0xb0 0x23 0x3e 0x1e
+0xf8 0x21 0x20 0x1e
+0xf8 0x21 0x2f 0x1e
+
+# CHECK: fcmp    d4, d12
+# CHECK: fcmp    d23, #0.0
+# CHECK: fcmp    d23, #0.0
+0x80 0x20 0x6c 0x1e
+0xe8 0x22 0x60 0x1e
+0xe8 0x22 0x77 0x1e
+
+# CHECK: fcmpe   d26, d22
+# CHECK: fcmpe   d29, #0.0
+# CHECK: fcmpe   d29, #0.0
+0x50 0x23 0x76 0x1e
+0xb8 0x23 0x60 0x1e
+0xb8 0x23 0x6d 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point conditional compare
+#------------------------------------------------------------------------------
+
+# CHECK: fccmp s1, s31, #0, eq
+# CHECK: fccmp s3, s0, #15, hs
+# CHECK: fccmp s31, s15, #13, hs
+0x20 0x04 0x3f 0x1e
+0x6f 0x24 0x20 0x1e
+0xed 0x27 0x2f 0x1e
+
+# CHECK: fccmp d9, d31, #0, le
+# CHECK: fccmp d3, d0, #15, gt
+# CHECK: fccmp d31, d5, #7, ne
+0x20 0xd5 0x7f 0x1e
+0x6f 0xc4 0x60 0x1e
+0xe7 0x17 0x65 0x1e
+
+# CHECK: fccmpe s1, s31, #0, eq
+# CHECK: fccmpe s3, s0, #15, hs
+# CHECK: fccmpe s31, s15, #13, hs
+0x30 0x04 0x3f 0x1e
+0x7f 0x24 0x20 0x1e
+0xfd 0x27 0x2f 0x1e
+
+# CHECK: fccmpe d9, d31, #0, le
+# CHECK: fccmpe d3, d0, #15, gt
+# CHECK: fccmpe d31, d5, #7, ne
+0x30 0xd5 0x7f 0x1e
+0x7f 0xc4 0x60 0x1e
+0xf7 0x17 0x65 0x1e
+
+#-------------------------------------------------------------------------------
+# Floating-point conditional compare
+#-------------------------------------------------------------------------------
+
+# CHECK: fcsel s3, s20, s9, pl
+# CHECK: fcsel d9, d10, d11, mi
+0x83 0x5e 0x29 0x1e
+0x49 0x4d 0x6b 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: fmov     s0, s1
+# CHECK: fabs     s2, s3
+# CHECK: fneg     s4, s5
+# CHECK: fsqrt    s6, s7
+# CHECK: fcvt     d8, s9
+# CHECK: fcvt     h10, s11
+# CHECK: frintn   s12, s13
+# CHECK: frintp   s14, s15
+# CHECK: frintm   s16, s17
+# CHECK: frintz   s18, s19
+# CHECK: frinta   s20, s21
+# CHECK: frintx   s22, s23
+# CHECK: frinti   s24, s25
+0x20 0x40 0x20 0x1e
+0x62 0xc0 0x20 0x1e
+0xa4 0x40 0x21 0x1e
+0xe6 0xc0 0x21 0x1e
+0x28 0xc1 0x22 0x1e
+0x6a 0xc1 0x23 0x1e
+0xac 0x41 0x24 0x1e
+0xee 0xc1 0x24 0x1e
+0x30 0x42 0x25 0x1e
+0x72 0xc2 0x25 0x1e
+0xb4 0x42 0x26 0x1e
+0xf6 0x42 0x27 0x1e
+0x38 0xc3 0x27 0x1e
+
+# CHECK: fmov     d0, d1
+# CHECK: fabs     d2, d3
+# CHECK: fneg     d4, d5
+# CHECK: fsqrt    d6, d7
+# CHECK: fcvt     s8, d9
+# CHECK: fcvt     h10, d11
+# CHECK: frintn   d12, d13
+# CHECK: frintp   d14, d15
+# CHECK: frintm   d16, d17
+# CHECK: frintz   d18, d19
+# CHECK: frinta   d20, d21
+# CHECK: frintx   d22, d23
+# CHECK: frinti   d24, d25
+0x20 0x40 0x60 0x1e
+0x62 0xc0 0x60 0x1e
+0xa4 0x40 0x61 0x1e
+0xe6 0xc0 0x61 0x1e
+0x28 0x41 0x62 0x1e
+0x6a 0xc1 0x63 0x1e
+0xac 0x41 0x64 0x1e
+0xee 0xc1 0x64 0x1e
+0x30 0x42 0x65 0x1e
+0x72 0xc2 0x65 0x1e
+0xb4 0x42 0x66 0x1e
+0xf6 0x42 0x67 0x1e
+0x38 0xc3 0x67 0x1e
+
+# CHECK: fcvt     s26, h27
+# CHECK: fcvt     d28, h29
+0x7a 0x43 0xe2 0x1e
+0xbc 0xc3 0xe2 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (2 sources)
+#------------------------------------------------------------------------------
+
+# CHECK: fmul     s20, s19, s17
+# CHECK: fdiv     s1, s2, s3
+# CHECK: fadd     s4, s5, s6
+# CHECK: fsub     s7, s8, s9
+# CHECK: fmax     s10, s11, s12
+# CHECK: fmin     s13, s14, s15
+# CHECK: fmaxnm   s16, s17, s18
+# CHECK: fminnm   s19, s20, s21
+# CHECK: fnmul    s22, s23, s2
+0x74 0xa 0x31 0x1e
+0x41 0x18 0x23 0x1e
+0xa4 0x28 0x26 0x1e
+0x7 0x39 0x29 0x1e
+0x6a 0x49 0x2c 0x1e
+0xcd 0x59 0x2f 0x1e
+0x30 0x6a 0x32 0x1e
+0x93 0x7a 0x35 0x1e
+0xf6 0x8a 0x38 0x1e
+
+
+# CHECK: fmul     d20, d19, d17
+# CHECK: fdiv     d1, d2, d3
+# CHECK: fadd     d4, d5, d6
+# CHECK: fsub     d7, d8, d9
+# CHECK: fmax     d10, d11, d12
+# CHECK: fmin     d13, d14, d15
+# CHECK: fmaxnm   d16, d17, d18
+# CHECK: fminnm   d19, d20, d21
+# CHECK: fnmul    d22, d23, d24
+0x74 0xa 0x71 0x1e
+0x41 0x18 0x63 0x1e
+0xa4 0x28 0x66 0x1e
+0x7 0x39 0x69 0x1e
+0x6a 0x49 0x6c 0x1e
+0xcd 0x59 0x6f 0x1e
+0x30 0x6a 0x72 0x1e
+0x93 0x7a 0x75 0x1e
+0xf6 0x8a 0x78 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: fmadd s3, s5, s6, s31
+# CHECK: fmadd d3, d13, d0, d23
+# CHECK: fmsub s3, s5, s6, s31
+# CHECK: fmsub d3, d13, d0, d23
+# CHECK: fnmadd s3, s5, s6, s31
+# CHECK: fnmadd d3, d13, d0, d23
+# CHECK: fnmsub s3, s5, s6, s31
+# CHECK: fnmsub d3, d13, d0, d23
+0xa3 0x7c 0x06 0x1f
+0xa3 0x5d 0x40 0x1f
+0xa3 0xfc 0x06 0x1f
+0xa3 0xdd 0x40 0x1f
+0xa3 0x7c 0x26 0x1f
+0xa3 0x5d 0x60 0x1f
+0xa3 0xfc 0x26 0x1f
+0xa3 0xdd 0x60 0x1f
+
+#------------------------------------------------------------------------------
+# Floating-point <-> fixed-point conversion
+#------------------------------------------------------------------------------
+
+# CHECK: fcvtzs  w3, s5, #1
+# CHECK: fcvtzs  wzr, s20, #13
+# CHECK: fcvtzs  w19, s0, #32
+0xa3 0xfc 0x18 0x1e
+0x9f 0xce 0x18 0x1e
+0x13 0x80 0x18 0x1e
+
+# CHECK: fcvtzs  x3, s5, #1
+# CHECK: fcvtzs  x12, s30, #45
+# CHECK: fcvtzs  x19, s0, #64
+0xa3 0xfc 0x18 0x9e
+0xcc 0x4f 0x18 0x9e
+0x13 0x00 0x18 0x9e
+
+# CHECK: fcvtzs  w3, d5, #1
+# CHECK: fcvtzs  wzr, d20, #13
+# CHECK: fcvtzs  w19, d0, #32
+0xa3 0xfc 0x58 0x1e
+0x9f 0xce 0x58 0x1e
+0x13 0x80 0x58 0x1e
+
+# CHECK: fcvtzs  x3, d5, #1
+# CHECK: fcvtzs  x12, d30, #45
+# CHECK: fcvtzs  x19, d0, #64
+0xa3 0xfc 0x58 0x9e
+0xcc 0x4f 0x58 0x9e
+0x13 0x00 0x58 0x9e
+
+# CHECK: fcvtzu  w3, s5, #1
+# CHECK: fcvtzu  wzr, s20, #13
+# CHECK: fcvtzu  w19, s0, #32
+0xa3 0xfc 0x19 0x1e
+0x9f 0xce 0x19 0x1e
+0x13 0x80 0x19 0x1e
+
+# CHECK: fcvtzu  x3, s5, #1
+# CHECK: fcvtzu  x12, s30, #45
+# CHECK: fcvtzu  x19, s0, #64
+0xa3 0xfc 0x19 0x9e
+0xcc 0x4f 0x19 0x9e
+0x13 0x00 0x19 0x9e
+
+# CHECK: fcvtzu  w3, d5, #1
+# CHECK: fcvtzu  wzr, d20, #13
+# CHECK: fcvtzu  w19, d0, #32
+0xa3 0xfc 0x59 0x1e
+0x9f 0xce 0x59 0x1e
+0x13 0x80 0x59 0x1e
+
+# CHECK: fcvtzu  x3, d5, #1
+# CHECK: fcvtzu  x12, d30, #45
+# CHECK: fcvtzu  x19, d0, #64
+0xa3 0xfc 0x59 0x9e
+0xcc 0x4f 0x59 0x9e
+0x13 0x00 0x59 0x9e
+
+# CHECK: scvtf   s23, w19, #1
+# CHECK: scvtf   s31, wzr, #20
+# CHECK: scvtf   s14, w0, #32
+0x77 0xfe 0x02 0x1e
+0xff 0xb3 0x02 0x1e
+0x0e 0x80 0x02 0x1e
+
+# CHECK: scvtf   s23, x19, #1
+# CHECK: scvtf   s31, xzr, #20
+# CHECK: scvtf   s14, x0, #64
+0x77 0xfe 0x02 0x9e
+0xff 0xb3 0x02 0x9e
+0x0e 0x00 0x02 0x9e
+
+# CHECK: scvtf   d23, w19, #1
+# CHECK: scvtf   d31, wzr, #20
+# CHECK: scvtf   d14, w0, #32
+0x77 0xfe 0x42 0x1e
+0xff 0xb3 0x42 0x1e
+0x0e 0x80 0x42 0x1e
+
+# CHECK: scvtf   d23, x19, #1
+# CHECK: scvtf   d31, xzr, #20
+# CHECK: scvtf   d14, x0, #64
+0x77 0xfe 0x42 0x9e
+0xff 0xb3 0x42 0x9e
+0x0e 0x00 0x42 0x9e
+
+# CHECK: ucvtf   s23, w19, #1
+# CHECK: ucvtf   s31, wzr, #20
+# CHECK: ucvtf   s14, w0, #32
+0x77 0xfe 0x03 0x1e
+0xff 0xb3 0x03 0x1e
+0x0e 0x80 0x03 0x1e
+
+# CHECK: ucvtf   s23, x19, #1
+# CHECK: ucvtf   s31, xzr, #20
+# CHECK: ucvtf   s14, x0, #64
+0x77 0xfe 0x03 0x9e
+0xff 0xb3 0x03 0x9e
+0x0e 0x00 0x03 0x9e
+
+# CHECK: ucvtf   d23, w19, #1
+# CHECK: ucvtf   d31, wzr, #20
+# CHECK: ucvtf   d14, w0, #32
+0x77 0xfe 0x43 0x1e
+0xff 0xb3 0x43 0x1e
+0x0e 0x80 0x43 0x1e
+
+# CHECK: ucvtf   d23, x19, #1
+# CHECK: ucvtf   d31, xzr, #20
+# CHECK: ucvtf   d14, x0, #64
+0x77 0xfe 0x43 0x9e
+0xff 0xb3 0x43 0x9e
+0x0e 0x00 0x43 0x9e
+
+#------------------------------------------------------------------------------
+# Floating-point <-> integer conversion
+#------------------------------------------------------------------------------
+# CHECK: fcvtns   w3, s31
+# CHECK: fcvtns   xzr, s12
+# CHECK: fcvtnu   wzr, s12
+# CHECK: fcvtnu   x0, s0
+0xe3 0x3 0x20 0x1e
+0x9f 0x1 0x20 0x9e
+0x9f 0x1 0x21 0x1e
+0x0 0x0 0x21 0x9e
+
+# CHECK: fcvtps   wzr, s9
+# CHECK: fcvtps   x12, s20
+# CHECK: fcvtpu   w30, s23
+# CHECK: fcvtpu   x29, s3
+0x3f 0x1 0x28 0x1e
+0x8c 0x2 0x28 0x9e
+0xfe 0x2 0x29 0x1e
+0x7d 0x0 0x29 0x9e
+
+# CHECK: fcvtms   w2, s3
+# CHECK: fcvtms   x4, s5
+# CHECK: fcvtmu   w6, s7
+# CHECK: fcvtmu   x8, s9
+0x62 0x0 0x30 0x1e
+0xa4 0x0 0x30 0x9e
+0xe6 0x0 0x31 0x1e
+0x28 0x1 0x31 0x9e
+
+# CHECK: fcvtzs   w10, s11
+# CHECK: fcvtzs   x12, s13
+# CHECK: fcvtzu   w14, s15
+# CHECK: fcvtzu   x15, s16
+0x6a 0x1 0x38 0x1e
+0xac 0x1 0x38 0x9e
+0xee 0x1 0x39 0x1e
+0xf 0x2 0x39 0x9e
+
+# CHECK: scvtf    s17, w18
+# CHECK: scvtf    s19, x20
+# CHECK: ucvtf    s21, w22
+# CHECK: scvtf    s23, x24
+0x51 0x2 0x22 0x1e
+0x93 0x2 0x22 0x9e
+0xd5 0x2 0x23 0x1e
+0x17 0x3 0x22 0x9e
+
+# CHECK: fcvtas   w25, s26
+# CHECK: fcvtas   x27, s28
+# CHECK: fcvtau   w29, s30
+# CHECK: fcvtau   xzr, s0
+0x59 0x3 0x24 0x1e
+0x9b 0x3 0x24 0x9e
+0xdd 0x3 0x25 0x1e
+0x1f 0x0 0x25 0x9e
+
+# CHECK: fcvtns   w3, d31
+# CHECK: fcvtns   xzr, d12
+# CHECK: fcvtnu   wzr, d12
+# CHECK: fcvtnu   x0, d0
+0xe3 0x3 0x60 0x1e
+0x9f 0x1 0x60 0x9e
+0x9f 0x1 0x61 0x1e
+0x0 0x0 0x61 0x9e
+
+# CHECK: fcvtps   wzr, d9
+# CHECK: fcvtps   x12, d20
+# CHECK: fcvtpu   w30, d23
+# CHECK: fcvtpu   x29, d3
+0x3f 0x1 0x68 0x1e
+0x8c 0x2 0x68 0x9e
+0xfe 0x2 0x69 0x1e
+0x7d 0x0 0x69 0x9e
+
+# CHECK: fcvtms   w2, d3
+# CHECK: fcvtms   x4, d5
+# CHECK: fcvtmu   w6, d7
+# CHECK: fcvtmu   x8, d9
+0x62 0x0 0x70 0x1e
+0xa4 0x0 0x70 0x9e
+0xe6 0x0 0x71 0x1e
+0x28 0x1 0x71 0x9e
+
+# CHECK: fcvtzs   w10, d11
+# CHECK: fcvtzs   x12, d13
+# CHECK: fcvtzu   w14, d15
+# CHECK: fcvtzu   x15, d16
+0x6a 0x1 0x78 0x1e
+0xac 0x1 0x78 0x9e
+0xee 0x1 0x79 0x1e
+0xf 0x2 0x79 0x9e
+
+# CHECK: scvtf    d17, w18
+# CHECK: scvtf    d19, x20
+# CHECK: ucvtf    d21, w22
+# CHECK: ucvtf    d23, x24
+0x51 0x2 0x62 0x1e
+0x93 0x2 0x62 0x9e
+0xd5 0x2 0x63 0x1e
+0x17 0x3 0x63 0x9e
+
+# CHECK: fcvtas   w25, d26
+# CHECK: fcvtas   x27, d28
+# CHECK: fcvtau   w29, d30
+# CHECK: fcvtau   xzr, d0
+0x59 0x3 0x64 0x1e
+0x9b 0x3 0x64 0x9e
+0xdd 0x3 0x65 0x1e
+0x1f 0x0 0x65 0x9e
+
+# CHECK: fmov     w3, s9
+# CHECK: fmov     s9, w3
+0x23 0x1 0x26 0x1e
+0x69 0x0 0x27 0x1e
+
+# CHECK: fmov     x20, d31
+# CHECK: fmov     d1, x15
+0xf4 0x3 0x66 0x9e
+0xe1 0x1 0x67 0x9e
+
+# CHECK: fmov     x3, v12.d[1]
+# CHECK: fmov     v1.d[1], x19
+0x83 0x1 0xae 0x9e
+0x61 0x2 0xaf 0x9e
+
+#------------------------------------------------------------------------------
+# Floating-point immediate
+#------------------------------------------------------------------------------
+
+# CHECK: fmov     s2, #0.12500000
+# CHECK: fmov     s3, #1.00000000
+# CHECK: fmov     d30, #16.00000000
+0x2 0x10 0x28 0x1e
+0x3 0x10 0x2e 0x1e
+0x1e 0x10 0x66 0x1e
+
+# CHECK: fmov     s4, #1.06250000
+# CHECK: fmov     d10, #1.93750000
+0x4 0x30 0x2e 0x1e
+0xa 0xf0 0x6f 0x1e
+
+# CHECK: fmov     s12, #-1.00000000
+0xc 0x10 0x3e 0x1e
+
+# CHECK: fmov     d16, #8.50000000
+0x10 0x30 0x64 0x1e
+
+#------------------------------------------------------------------------------
+# Load-register (literal)
+#------------------------------------------------------------------------------
+
+# CHECK: ldr       w3, #0
+# CHECK: ldr       x29, #4
+# CHECK: ldrsw     xzr, #-4
+0x03 0x00 0x00 0x18
+0x3d 0x00 0x00 0x58
+0xff 0xff 0xff 0x98
+
+# CHECK: ldr       s0, #8
+# CHECK: ldr       d0, #1048572
+# CHECK: ldr       q0, #-1048576
+0x40 0x00 0x00 0x1c
+0xe0 0xff 0x7f 0x5c
+0x00 0x00 0x80 0x9c
+
+# CHECK: prfm      pldl1strm, #0
+# CHECK: prfm      #22, #0
+0x01 0x00 0x00 0xd8
+0x16 0x00 0x00 0xd8
+
+#------------------------------------------------------------------------------
+# Load/store exclusive
+#------------------------------------------------------------------------------
+
+#CHECK: stxrb      w18, w8, [sp]
+#CHECK: stxrh      w24, w15, [x16]
+#CHECK: stxr       w5, w6, [x17]
+#CHECK: stxr       w1, x10, [x21]
+#CHECK: stxr       w1, x10, [x21]
+0xe8 0x7f 0x12 0x08
+0x0f 0x7e 0x18 0x48
+0x26 0x7e 0x05 0x88
+0xaa 0x7e 0x01 0xc8
+0xaa 0x7a 0x01 0xc8
+
+#CHECK: ldxrb      w30, [x0]
+#CHECK: ldxrh      w17, [x4]
+#CHECK: ldxr       w22, [sp]
+#CHECK: ldxr       x11, [x29]
+#CHECK: ldxr       x11, [x29]
+#CHECK: ldxr       x11, [x29]
+0x1e 0x7c 0x5f 0x08
+0x91 0x7c 0x5f 0x48
+0xf6 0x7f 0x5f 0x88
+0xab 0x7f 0x5f 0xc8
+0xab 0x6f 0x5f 0xc8
+0xab 0x7f 0x5e 0xc8
+
+#CHECK: stxp       w12, w11, w10, [sp]
+#CHECK: stxp       wzr, x27, x9, [x12]
+0xeb 0x2b 0x2c 0x88
+0x9b 0x25 0x3f 0xc8
+
+#CHECK: ldxp       w0, wzr, [sp]
+#CHECK: ldxp       x17, x0, [x18]
+#CHECK: ldxp       x17, x0, [x18]
+0xe0 0x7f 0x7f 0x88
+0x51 0x02 0x7f 0xc8
+0x51 0x02 0x7e 0xc8
+
+#CHECK: stlxrb     w12, w22, [x0]
+#CHECK: stlxrh     w10, w1, [x1]
+#CHECK: stlxr      w9, w2, [x2]
+#CHECK: stlxr      w9, x3, [sp]
+
+0x16 0xfc 0x0c 0x08
+0x21 0xfc 0x0a 0x48
+0x42 0xfc 0x09 0x88
+0xe3 0xff 0x09 0xc8
+
+#CHECK: ldaxrb     w8, [x4]
+#CHECK: ldaxrh     w7, [x5]
+#CHECK: ldaxr      w6, [sp]
+#CHECK: ldaxr      x5, [x6]
+#CHECK: ldaxr      x5, [x6]
+#CHECK: ldaxr      x5, [x6]
+0x88 0xfc 0x5f 0x08
+0xa7 0xfc 0x5f 0x48
+0xe6 0xff 0x5f 0x88
+0xc5 0xfc 0x5f 0xc8
+0xc5 0xec 0x5f 0xc8
+0xc5 0xfc 0x5e 0xc8
+
+#CHECK: stlxp      w4, w5, w6, [sp]
+#CHECK: stlxp      wzr, x6, x7, [x1]
+0xe5 0x9b 0x24 0x88
+0x26 0x9c 0x3f 0xc8
+
+#CHECK: ldaxp      w5, w18, [sp]
+#CHECK: ldaxp      x6, x19, [x22]
+#CHECK: ldaxp      x6, x19, [x22]
+0xe5 0xcb 0x7f 0x88
+0xc6 0xce 0x7f 0xc8
+0xc6 0xce 0x7e 0xc8
+
+#CHECK: stlrb      w24, [sp]
+#CHECK: stlrh      w25, [x30]
+#CHECK: stlr       w26, [x29]
+#CHECK: stlr       x27, [x28]
+#CHECK: stlr       x27, [x28]
+#CHECK: stlr       x27, [x28]
+0xf8 0xff 0x9f 0x08
+0xd9 0xff 0x9f 0x48
+0xba 0xff 0x9f 0x88
+0x9b 0xff 0x9f 0xc8
+0x9b 0xef 0x9f 0xc8
+0x9b 0xff 0x9e 0xc8
+
+#CHECK: ldarb      w23, [sp]
+#CHECK: ldarh      w22, [x30]
+#CHECK: ldar       wzr, [x29]
+#CHECK: ldar       x21, [x28]
+#CHECK: ldar       x21, [x28]
+#CHECK: ldar       x21, [x28]
+0xf7 0xff 0xdf 0x08
+0xd6 0xff 0xdf 0x48
+0xbf 0xff 0xdf 0x88
+0x95 0xff 0xdf 0xc8
+0x95 0xef 0xdf 0xc8
+0x95 0xff 0xde 0xc8
+
+#------------------------------------------------------------------------------
+# Load/store (unscaled  immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: sturb    w9, [sp]
+# CHECK: sturh    wzr, [x12, #255]
+# CHECK: stur     w16, [x0, #-256]
+# CHECK: stur     x28, [x14, #1]
+0xe9 0x3 0x0 0x38
+0x9f 0xf1 0xf 0x78
+0x10 0x0 0x10 0xb8
+0xdc 0x11 0x0 0xf8
+
+# CHECK: ldurb    w1, [x20, #255]
+# CHECK: ldurh    w20, [x1, #255]
+# CHECK: ldur     w12, [sp, #255]
+# CHECK: ldur     xzr, [x12, #255]
+0x81 0xf2 0x4f 0x38
+0x34 0xf0 0x4f 0x78
+0xec 0xf3 0x4f 0xb8
+0x9f 0xf1 0x4f 0xf8
+
+# CHECK: ldursb   x9, [x7, #-256]
+# CHECK: ldursh   x17, [x19, #-256]
+# CHECK: ldursw   x20, [x15, #-256]
+# CHECK: prfum    pldl2keep, [sp, #-256]
+# CHECK: ldursb   w19, [x1, #-256]
+# CHECK: ldursh   w15, [x21, #-256]
+0xe9 0x0 0x90 0x38
+0x71 0x2 0x90 0x78
+0xf4 0x1 0x90 0xb8
+0xe2 0x3 0x90 0xf8
+0x33 0x0 0xd0 0x38
+0xaf 0x2 0xd0 0x78
+
+# CHECK: stur     b0, [sp, #1]
+# CHECK: stur     h12, [x12, #-1]
+# CHECK: stur     s15, [x0, #255]
+# CHECK: stur     d31, [x5, #25]
+# CHECK: stur     q9, [x5]
+0xe0 0x13 0x0 0x3c
+0x8c 0xf1 0x1f 0x7c
+0xf 0xf0 0xf 0xbc
+0xbf 0x90 0x1 0xfc
+0xa9 0x0 0x80 0x3c
+
+# CHECK: ldur     b3, [sp]
+# CHECK: ldur     h5, [x4, #-256]
+# CHECK: ldur     s7, [x12, #-1]
+# CHECK: ldur     d11, [x19, #4]
+# CHECK: ldur     q13, [x1, #2]
+0xe3 0x3 0x40 0x3c
+0x85 0x0 0x50 0x7c
+0x87 0xf1 0x5f 0xbc
+0x6b 0x42 0x40 0xfc
+0x2d 0x20 0xc0 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store (immediate post-indexed)
+#------------------------------------------------------------------------------
+
+# E.g. "str xzr, [sp], #4" is *not* unpredictable
+# CHECK-NOT: warning: potentially undefined instruction encoding
+0xff 0x47 0x40 0xb8
+
+# CHECK: strb     w9, [x2], #255
+# CHECK: strb     w10, [x3], #1
+# CHECK: strb     w10, [x3], #-256
+# CHECK: strh     w9, [x2], #255
+# CHECK: strh     w9, [x2], #1
+# CHECK: strh     w10, [x3], #-256
+0x49 0xf4 0xf 0x38
+0x6a 0x14 0x0 0x38
+0x6a 0x4 0x10 0x38
+0x49 0xf4 0xf 0x78
+0x49 0x14 0x0 0x78
+0x6a 0x4 0x10 0x78
+
+# CHECK: str      w19, [sp], #255
+# CHECK: str      w20, [x30], #1
+# CHECK: str      w21, [x12], #-256
+# CHECK: str      xzr, [x9], #255
+# CHECK: str      x2, [x3], #1
+# CHECK: str      x19, [x12], #-256
+0xf3 0xf7 0xf 0xb8
+0xd4 0x17 0x0 0xb8
+0x95 0x5 0x10 0xb8
+0x3f 0xf5 0xf 0xf8
+0x62 0x14 0x0 0xf8
+0x93 0x5 0x10 0xf8
+
+# CHECK: ldrb     w9, [x2], #255
+# CHECK: ldrb     w10, [x3], #1
+# CHECK: ldrb     w10, [x3], #-256
+# CHECK: ldrh     w9, [x2], #255
+# CHECK: ldrh     w9, [x2], #1
+# CHECK: ldrh     w10, [x3], #-256
+0x49 0xf4 0x4f 0x38
+0x6a 0x14 0x40 0x38
+0x6a 0x4 0x50 0x38
+0x49 0xf4 0x4f 0x78
+0x49 0x14 0x40 0x78
+0x6a 0x4 0x50 0x78
+
+# CHECK: ldr      w19, [sp], #255
+# CHECK: ldr      w20, [x30], #1
+# CHECK: ldr      w21, [x12], #-256
+# CHECK: ldr      xzr, [x9], #255
+# CHECK: ldr      x2, [x3], #1
+# CHECK: ldr      x19, [x12], #-256
+0xf3 0xf7 0x4f 0xb8
+0xd4 0x17 0x40 0xb8
+0x95 0x5 0x50 0xb8
+0x3f 0xf5 0x4f 0xf8
+0x62 0x14 0x40 0xf8
+0x93 0x5 0x50 0xf8
+
+# CHECK: ldrsb    xzr, [x9], #255
+# CHECK: ldrsb    x2, [x3], #1
+# CHECK: ldrsb    x19, [x12], #-256
+# CHECK: ldrsh    xzr, [x9], #255
+# CHECK: ldrsh    x2, [x3], #1
+# CHECK: ldrsh    x19, [x12], #-256
+# CHECK: ldrsw    xzr, [x9], #255
+# CHECK: ldrsw    x2, [x3], #1
+# CHECK: ldrsw    x19, [x12], #-256
+0x3f 0xf5 0x8f 0x38
+0x62 0x14 0x80 0x38
+0x93 0x5 0x90 0x38
+0x3f 0xf5 0x8f 0x78
+0x62 0x14 0x80 0x78
+0x93 0x5 0x90 0x78
+0x3f 0xf5 0x8f 0xb8
+0x62 0x14 0x80 0xb8
+0x93 0x5 0x90 0xb8
+
+# CHECK: ldrsb    wzr, [x9], #255
+# CHECK: ldrsb    w2, [x3], #1
+# CHECK: ldrsb    w19, [x12], #-256
+# CHECK: ldrsh    wzr, [x9], #255
+# CHECK: ldrsh    w2, [x3], #1
+# CHECK: ldrsh    w19, [x12], #-256
+0x3f 0xf5 0xcf 0x38
+0x62 0x14 0xc0 0x38
+0x93 0x5 0xd0 0x38
+0x3f 0xf5 0xcf 0x78
+0x62 0x14 0xc0 0x78
+0x93 0x5 0xd0 0x78
+
+# CHECK: str      b0, [x0], #255
+# CHECK: str      b3, [x3], #1
+# CHECK: str      b5, [sp], #-256
+# CHECK: str      h10, [x10], #255
+# CHECK: str      h13, [x23], #1
+# CHECK: str      h15, [sp], #-256
+# CHECK: str      s20, [x20], #255
+# CHECK: str      s23, [x23], #1
+# CHECK: str      s25, [x0], #-256
+# CHECK: str      d20, [x20], #255
+# CHECK: str      d23, [x23], #1
+# CHECK: str      d25, [x0], #-256
+0x0 0xf4 0xf 0x3c
+0x63 0x14 0x0 0x3c
+0xe5 0x7 0x10 0x3c
+0x4a 0xf5 0xf 0x7c
+0xed 0x16 0x0 0x7c
+0xef 0x7 0x10 0x7c
+0x94 0xf6 0xf 0xbc
+0xf7 0x16 0x0 0xbc
+0x19 0x4 0x10 0xbc
+0x94 0xf6 0xf 0xfc
+0xf7 0x16 0x0 0xfc
+0x19 0x4 0x10 0xfc
+
+# CHECK: ldr      b0, [x0], #255
+# CHECK: ldr      b3, [x3], #1
+# CHECK: ldr      b5, [sp], #-256
+# CHECK: ldr      h10, [x10], #255
+# CHECK: ldr      h13, [x23], #1
+# CHECK: ldr      h15, [sp], #-256
+# CHECK: ldr      s20, [x20], #255
+# CHECK: ldr      s23, [x23], #1
+# CHECK: ldr      s25, [x0], #-256
+# CHECK: ldr      d20, [x20], #255
+# CHECK: ldr      d23, [x23], #1
+# CHECK: ldr      d25, [x0], #-256
+0x0 0xf4 0x4f 0x3c
+0x63 0x14 0x40 0x3c
+0xe5 0x7 0x50 0x3c
+0x4a 0xf5 0x4f 0x7c
+0xed 0x16 0x40 0x7c
+0xef 0x7 0x50 0x7c
+0x94 0xf6 0x4f 0xbc
+0xf7 0x16 0x40 0xbc
+0x19 0x4 0x50 0xbc
+0x94 0xf6 0x4f 0xfc
+0xf7 0x16 0x40 0xfc
+0x19 0x4 0x50 0xfc
+0x34 0xf4 0xcf 0x3c
+
+# CHECK: ldr      q20, [x1], #255
+# CHECK: ldr      q23, [x9], #1
+# CHECK: ldr      q25, [x20], #-256
+# CHECK: str      q10, [x1], #255
+# CHECK: str      q22, [sp], #1
+# CHECK: str      q21, [x20], #-256
+0x37 0x15 0xc0 0x3c
+0x99 0x6 0xd0 0x3c
+0x2a 0xf4 0x8f 0x3c
+0xf6 0x17 0x80 0x3c
+0x95 0x6 0x90 0x3c
+
+#-------------------------------------------------------------------------------
+# Load-store register (immediate pre-indexed)
+#-------------------------------------------------------------------------------
+
+# E.g. "str xzr, [sp, #4]!" is *not* unpredictable
+# CHECK-NOT: warning: potentially undefined instruction encoding
+0xff 0xf 0x40 0xf8
+
+# CHECK: ldr      x3, [x4, #0]!
+0x83 0xc 0x40 0xf8
+
+# CHECK: strb     w9, [x2, #255]!
+# CHECK: strb     w10, [x3, #1]!
+# CHECK: strb     w10, [x3, #-256]!
+# CHECK: strh     w9, [x2, #255]!
+# CHECK: strh     w9, [x2, #1]!
+# CHECK: strh     w10, [x3, #-256]!
+0x49 0xfc 0xf 0x38
+0x6a 0x1c 0x0 0x38
+0x6a 0xc 0x10 0x38
+0x49 0xfc 0xf 0x78
+0x49 0x1c 0x0 0x78
+0x6a 0xc 0x10 0x78
+
+# CHECK: str      w19, [sp, #255]!
+# CHECK: str      w20, [x30, #1]!
+# CHECK: str      w21, [x12, #-256]!
+# CHECK: str      xzr, [x9, #255]!
+# CHECK: str      x2, [x3, #1]!
+# CHECK: str      x19, [x12, #-256]!
+0xf3 0xff 0xf 0xb8
+0xd4 0x1f 0x0 0xb8
+0x95 0xd 0x10 0xb8
+0x3f 0xfd 0xf 0xf8
+0x62 0x1c 0x0 0xf8
+0x93 0xd 0x10 0xf8
+
+# CHECK: ldrb     w9, [x2, #255]!
+# CHECK: ldrb     w10, [x3, #1]!
+# CHECK: ldrb     w10, [x3, #-256]!
+# CHECK: ldrh     w9, [x2, #255]!
+# CHECK: ldrh     w9, [x2, #1]!
+# CHECK: ldrh     w10, [x3, #-256]!
+0x49 0xfc 0x4f 0x38
+0x6a 0x1c 0x40 0x38
+0x6a 0xc 0x50 0x38
+0x49 0xfc 0x4f 0x78
+0x49 0x1c 0x40 0x78
+0x6a 0xc 0x50 0x78
+
+# CHECK: ldr      w19, [sp, #255]!
+# CHECK: ldr      w20, [x30, #1]!
+# CHECK: ldr      w21, [x12, #-256]!
+# CHECK: ldr      xzr, [x9, #255]!
+# CHECK: ldr      x2, [x3, #1]!
+# CHECK: ldr      x19, [x12, #-256]!
+0xf3 0xff 0x4f 0xb8
+0xd4 0x1f 0x40 0xb8
+0x95 0xd 0x50 0xb8
+0x3f 0xfd 0x4f 0xf8
+0x62 0x1c 0x40 0xf8
+0x93 0xd 0x50 0xf8
+
+# CHECK: ldrsb    xzr, [x9, #255]!
+# CHECK: ldrsb    x2, [x3, #1]!
+# CHECK: ldrsb    x19, [x12, #-256]!
+# CHECK: ldrsh    xzr, [x9, #255]!
+# CHECK: ldrsh    x2, [x3, #1]!
+# CHECK: ldrsh    x19, [x12, #-256]!
+# CHECK: ldrsw    xzr, [x9, #255]!
+# CHECK: ldrsw    x2, [x3, #1]!
+# CHECK: ldrsw    x19, [x12, #-256]!
+0x3f 0xfd 0x8f 0x38
+0x62 0x1c 0x80 0x38
+0x93 0xd 0x90 0x38
+0x3f 0xfd 0x8f 0x78
+0x62 0x1c 0x80 0x78
+0x93 0xd 0x90 0x78
+0x3f 0xfd 0x8f 0xb8
+0x62 0x1c 0x80 0xb8
+0x93 0xd 0x90 0xb8
+
+# CHECK: ldrsb    wzr, [x9, #255]!
+# CHECK: ldrsb    w2, [x3, #1]!
+# CHECK: ldrsb    w19, [x12, #-256]!
+# CHECK: ldrsh    wzr, [x9, #255]!
+# CHECK: ldrsh    w2, [x3, #1]!
+# CHECK: ldrsh    w19, [x12, #-256]!
+0x3f 0xfd 0xcf 0x38
+0x62 0x1c 0xc0 0x38
+0x93 0xd 0xd0 0x38
+0x3f 0xfd 0xcf 0x78
+0x62 0x1c 0xc0 0x78
+0x93 0xd 0xd0 0x78
+
+# CHECK: str      b0, [x0, #255]!
+# CHECK: str      b3, [x3, #1]!
+# CHECK: str      b5, [sp, #-256]!
+# CHECK: str      h10, [x10, #255]!
+# CHECK: str      h13, [x23, #1]!
+# CHECK: str      h15, [sp, #-256]!
+# CHECK: str      s20, [x20, #255]!
+# CHECK: str      s23, [x23, #1]!
+# CHECK: str      s25, [x0, #-256]!
+# CHECK: str      d20, [x20, #255]!
+# CHECK: str      d23, [x23, #1]!
+# CHECK: str      d25, [x0, #-256]!
+0x0 0xfc 0xf 0x3c
+0x63 0x1c 0x0 0x3c
+0xe5 0xf 0x10 0x3c
+0x4a 0xfd 0xf 0x7c
+0xed 0x1e 0x0 0x7c
+0xef 0xf 0x10 0x7c
+0x94 0xfe 0xf 0xbc
+0xf7 0x1e 0x0 0xbc
+0x19 0xc 0x10 0xbc
+0x94 0xfe 0xf 0xfc
+0xf7 0x1e 0x0 0xfc
+0x19 0xc 0x10 0xfc
+
+# CHECK: ldr      b0, [x0, #255]!
+# CHECK: ldr      b3, [x3, #1]!
+# CHECK: ldr      b5, [sp, #-256]!
+# CHECK: ldr      h10, [x10, #255]!
+# CHECK: ldr      h13, [x23, #1]!
+# CHECK: ldr      h15, [sp, #-256]!
+# CHECK: ldr      s20, [x20, #255]!
+# CHECK: ldr      s23, [x23, #1]!
+# CHECK: ldr      s25, [x0, #-256]!
+# CHECK: ldr      d20, [x20, #255]!
+# CHECK: ldr      d23, [x23, #1]!
+# CHECK: ldr      d25, [x0, #-256]!
+0x0 0xfc 0x4f 0x3c
+0x63 0x1c 0x40 0x3c
+0xe5 0xf 0x50 0x3c
+0x4a 0xfd 0x4f 0x7c
+0xed 0x1e 0x40 0x7c
+0xef 0xf 0x50 0x7c
+0x94 0xfe 0x4f 0xbc
+0xf7 0x1e 0x40 0xbc
+0x19 0xc 0x50 0xbc
+0x94 0xfe 0x4f 0xfc
+0xf7 0x1e 0x40 0xfc
+0x19 0xc 0x50 0xfc
+
+# CHECK: ldr      q20, [x1, #255]!
+# CHECK: ldr      q23, [x9, #1]!
+# CHECK: ldr      q25, [x20, #-256]!
+# CHECK: str      q10, [x1, #255]!
+# CHECK: str      q22, [sp, #1]!
+# CHECK: str      q21, [x20, #-256]!
+0x34 0xfc 0xcf 0x3c
+0x37 0x1d 0xc0 0x3c
+0x99 0xe 0xd0 0x3c
+0x2a 0xfc 0x8f 0x3c
+0xf6 0x1f 0x80 0x3c
+0x95 0xe 0x90 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store (unprivileged)
+#------------------------------------------------------------------------------
+
+# CHECK: sttrb    w9, [sp]
+# CHECK: sttrh    wzr, [x12, #255]
+# CHECK: sttr     w16, [x0, #-256]
+# CHECK: sttr     x28, [x14, #1]
+0xe9 0x0b 0x0 0x38
+0x9f 0xf9 0xf 0x78
+0x10 0x08 0x10 0xb8
+0xdc 0x19 0x0 0xf8
+
+# CHECK: ldtrb    w1, [x20, #255]
+# CHECK: ldtrh    w20, [x1, #255]
+# CHECK: ldtr     w12, [sp, #255]
+# CHECK: ldtr     xzr, [x12, #255]
+0x81 0xfa 0x4f 0x38
+0x34 0xf8 0x4f 0x78
+0xec 0xfb 0x4f 0xb8
+0x9f 0xf9 0x4f 0xf8
+
+# CHECK: ldtrsb   x9, [x7, #-256]
+# CHECK: ldtrsh   x17, [x19, #-256]
+# CHECK: ldtrsw   x20, [x15, #-256]
+# CHECK: ldtrsb   w19, [x1, #-256]
+# CHECK: ldtrsh   w15, [x21, #-256]
+0xe9 0x08 0x90 0x38
+0x71 0x0a 0x90 0x78
+0xf4 0x09 0x90 0xb8
+0x33 0x08 0xd0 0x38
+0xaf 0x0a 0xd0 0x78
+
+#------------------------------------------------------------------------------
+# Load/store (unsigned  immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: ldr      x0, [x0]
+# CHECK: ldr      x4, [x29]
+# CHECK: ldr      x30, [x12, #32760]
+# CHECK: ldr      x20, [sp, #8]
+0x0 0x0 0x40 0xf9
+0xa4 0x3 0x40 0xf9
+0x9e 0xfd 0x7f 0xf9
+0xf4 0x7 0x40 0xf9
+
+# CHECK: ldr      xzr, [sp]
+0xff 0x3 0x40 0xf9
+
+# CHECK: ldr      w2, [sp]
+# CHECK: ldr      w17, [sp, #16380]
+# CHECK: ldr      w13, [x2, #4]
+0xe2 0x3 0x40 0xb9
+0xf1 0xff 0x7f 0xb9
+0x4d 0x4 0x40 0xb9
+
+# CHECK: ldrsw    x2, [x5, #4]
+# CHECK: ldrsw    x23, [sp, #16380]
+0xa2 0x4 0x80 0xb9
+0xf7 0xff 0xbf 0xb9
+
+# CHECK: ldrh     w2, [x4]
+# CHECK: ldrsh    w23, [x6, #8190]
+# CHECK: ldrsh    wzr, [sp, #2]
+# CHECK: ldrsh    x29, [x2, #2]
+0x82 0x0 0x40 0x79
+0xd7 0xfc 0xff 0x79
+0xff 0x7 0xc0 0x79
+0x5d 0x4 0x80 0x79
+
+# CHECK: ldrb     w26, [x3, #121]
+# CHECK: ldrb     w12, [x2]
+# CHECK: ldrsb    w27, [sp, #4095]
+# CHECK: ldrsb    xzr, [x15]
+0x7a 0xe4 0x41 0x39
+0x4c 0x0 0x40 0x39
+0xfb 0xff 0xff 0x39
+0xff 0x1 0x80 0x39
+
+# CHECK: str      x30, [sp]
+# CHECK: str      w20, [x4, #16380]
+# CHECK: strh     w20, [x10, #14]
+# CHECK: strh     w17, [sp, #8190]
+# CHECK: strb     w23, [x3, #4095]
+# CHECK: strb     wzr, [x2]
+0xfe 0x3 0x0 0xf9
+0x94 0xfc 0x3f 0xb9
+0x54 0x1d 0x0 0x79
+0xf1 0xff 0x3f 0x79
+0x77 0xfc 0x3f 0x39
+0x5f 0x0 0x0 0x39
+
+# CHECK: ldr      b31, [sp, #4095]
+# CHECK: ldr      h20, [x2, #8190]
+# CHECK: ldr      s10, [x19, #16380]
+# CHECK: ldr      d3, [x10, #32760]
+# CHECK: str      q12, [sp, #65520]
+0xff 0xff 0x7f 0x3d
+0x54 0xfc 0x7f 0x7d
+0x6a 0xfe 0x7f 0xbd
+0x43 0xfd 0x7f 0xfd
+0xec 0xff 0xbf 0x3d
+
+# CHECK: prfm    pldl1keep, [sp, #8]
+# CHECK: prfm    pldl1strm, [x3, #0]
+# CHECK: prfm    pldl2keep, [x5, #16]
+# CHECK: prfm    pldl2strm, [x2, #0]
+# CHECK: prfm    pldl3keep, [x5, #0]
+# CHECK: prfm    pldl3strm, [x6, #0]
+# CHECK: prfm    plil1keep, [sp, #8]
+# CHECK: prfm    plil1strm, [x3, #0]
+# CHECK: prfm    plil2keep, [x5, #16]
+# CHECK: prfm    plil2strm, [x2, #0]
+# CHECK: prfm    plil3keep, [x5, #0]
+# CHECK: prfm    plil3strm, [x6, #0]
+# CHECK: prfm    pstl1keep, [sp, #8]
+# CHECK: prfm    pstl1strm, [x3, #0]
+# CHECK: prfm    pstl2keep, [x5, #16]
+# CHECK: prfm    pstl2strm, [x2, #0]
+# CHECK: prfm    pstl3keep, [x5, #0]
+# CHECK: prfm    pstl3strm, [x6, #0]
+0xe0 0x07 0x80 0xf9
+0x61 0x00 0x80 0xf9
+0xa2 0x08 0x80 0xf9
+0x43 0x00 0x80 0xf9
+0xa4 0x00 0x80 0xf9
+0xc5 0x00 0x80 0xf9
+0xe8 0x07 0x80 0xf9
+0x69 0x00 0x80 0xf9
+0xaa 0x08 0x80 0xf9
+0x4b 0x00 0x80 0xf9
+0xac 0x00 0x80 0xf9
+0xcd 0x00 0x80 0xf9
+0xf0 0x07 0x80 0xf9
+0x71 0x00 0x80 0xf9
+0xb2 0x08 0x80 0xf9
+0x53 0x00 0x80 0xf9
+0xb4 0x00 0x80 0xf9
+0xd5 0x00 0x80 0xf9
+
+
+#------------------------------------------------------------------------------
+# Load/store (register offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldrb     w3, [sp, x5]
+# CHECK: ldrb     w9, [x27, x6]
+# CHECK: ldrsb    w10, [x30, x7]
+# CHECK: ldrb     w11, [x29, x3, sxtx]
+# CHECK: strb     w12, [x28, xzr, sxtx]
+# CHECK: ldrb     w14, [x26, w6, uxtw]
+# CHECK: ldrsb    w15, [x25, w7, uxtw]
+# CHECK: ldrb     w17, [x23, w9, sxtw]
+# CHECK: ldrsb    x18, [x22, w10, sxtw]
+0xe3 0x6b 0x65 0x38
+0x69 0x6b 0x66 0x38
+0xca 0x6b 0xe7 0x38
+0xab 0xeb 0x63 0x38
+0x8c 0xeb 0x3f 0x38
+0x4e 0x4b 0x66 0x38
+0x2f 0x4b 0xe7 0x38
+0xf1 0xca 0x69 0x38
+0xd2 0xca 0xaa 0x38
+
+# CHECK: ldrsh    w3, [sp, x5]
+# CHECK: ldrsh    w9, [x27, x6]
+# CHECK: ldrh     w10, [x30, x7, lsl #1]
+# CHECK: strh     w11, [x29, x3, sxtx]
+# CHECK: ldrh     w12, [x28, xzr, sxtx]
+# CHECK: ldrsh    x13, [x27, x5, sxtx #1]
+# CHECK: ldrh     w14, [x26, w6, uxtw]
+# CHECK: ldrh     w15, [x25, w7, uxtw]
+# CHECK: ldrsh    w16, [x24, w8, uxtw #1]
+# CHECK: ldrh     w17, [x23, w9, sxtw]
+# CHECK: ldrh     w18, [x22, w10, sxtw]
+# CHECK: strh     w19, [x21, wzr, sxtw #1]
+0xe3 0x6b 0xe5 0x78
+0x69 0x6b 0xe6 0x78
+0xca 0x7b 0x67 0x78
+0xab 0xeb 0x23 0x78
+0x8c 0xeb 0x7f 0x78
+0x6d 0xfb 0xa5 0x78
+0x4e 0x4b 0x66 0x78
+0x2f 0x4b 0x67 0x78
+0x10 0x5b 0xe8 0x78
+0xf1 0xca 0x69 0x78
+0xd2 0xca 0x6a 0x78
+0xb3 0xda 0x3f 0x78
+
+# CHECK: ldr      w3, [sp, x5]
+# CHECK: ldr      s9, [x27, x6]
+# CHECK: ldr      w10, [x30, x7, lsl #2]
+# CHECK: ldr      w11, [x29, x3, sxtx]
+# CHECK: str      s12, [x28, xzr, sxtx]
+# CHECK: str      w13, [x27, x5, sxtx #2]
+# CHECK: str      w14, [x26, w6, uxtw]
+# CHECK: ldr      w15, [x25, w7, uxtw]
+# CHECK: ldr      w16, [x24, w8, uxtw #2]
+# CHECK: ldrsw    x17, [x23, w9, sxtw]
+# CHECK: ldr      w18, [x22, w10, sxtw]
+# CHECK: ldrsw    x19, [x21, wzr, sxtw #2]
+0xe3 0x6b 0x65 0xb8
+0x69 0x6b 0x66 0xbc
+0xca 0x7b 0x67 0xb8
+0xab 0xeb 0x63 0xb8
+0x8c 0xeb 0x3f 0xbc
+0x6d 0xfb 0x25 0xb8
+0x4e 0x4b 0x26 0xb8
+0x2f 0x4b 0x67 0xb8
+0x10 0x5b 0x68 0xb8
+0xf1 0xca 0xa9 0xb8
+0xd2 0xca 0x6a 0xb8
+0xb3 0xda 0xbf 0xb8
+
+# CHECK: ldr      x3, [sp, x5]
+# CHECK: str      x9, [x27, x6]
+# CHECK: ldr      d10, [x30, x7, lsl #3]
+# CHECK: str      x11, [x29, x3, sxtx]
+# CHECK: ldr      x12, [x28, xzr, sxtx]
+# CHECK: ldr      x13, [x27, x5, sxtx #3]
+# CHECK: prfm     pldl1keep, [x26, w6, uxtw]
+# CHECK: ldr      x15, [x25, w7, uxtw]
+# CHECK: ldr      x16, [x24, w8, uxtw #3]
+# CHECK: ldr      x17, [x23, w9, sxtw]
+# CHECK: ldr      x18, [x22, w10, sxtw]
+# CHECK: str      d19, [x21, wzr, sxtw #3]
+0xe3 0x6b 0x65 0xf8
+0x69 0x6b 0x26 0xf8
+0xca 0x7b 0x67 0xfc
+0xab 0xeb 0x23 0xf8
+0x8c 0xeb 0x7f 0xf8
+0x6d 0xfb 0x65 0xf8
+0x40 0x4b 0xa6 0xf8
+0x2f 0x4b 0x67 0xf8
+0x10 0x5b 0x68 0xf8
+0xf1 0xca 0x69 0xf8
+0xd2 0xca 0x6a 0xf8
+0xb3 0xda 0x3f 0xfc
+
+# CHECK: ldr      q3, [sp, x5]
+# CHECK: ldr      q9, [x27, x6]
+# CHECK: ldr      q10, [x30, x7, lsl #4]
+# CHECK: str      q11, [x29, x3, sxtx]
+# CHECK: str      q12, [x28, xzr, sxtx]
+# CHECK: str      q13, [x27, x5, sxtx #4]
+# CHECK: ldr      q14, [x26, w6, uxtw]
+# CHECK: ldr      q15, [x25, w7, uxtw]
+# CHECK: ldr      q16, [x24, w8, uxtw #4]
+# CHECK: ldr      q17, [x23, w9, sxtw]
+# CHECK: str      q18, [x22, w10, sxtw]
+# CHECK: ldr      q19, [x21, wzr, sxtw #4]
+0xe3 0x6b 0xe5 0x3c
+0x69 0x6b 0xe6 0x3c
+0xca 0x7b 0xe7 0x3c
+0xab 0xeb 0xa3 0x3c
+0x8c 0xeb 0xbf 0x3c
+0x6d 0xfb 0xa5 0x3c
+0x4e 0x4b 0xe6 0x3c
+0x2f 0x4b 0xe7 0x3c
+0x10 0x5b 0xe8 0x3c
+0xf1 0xca 0xe9 0x3c
+0xd2 0xca 0xaa 0x3c
+0xb3 0xda 0xff 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp]
+# CHECK: stp      wzr, w9, [sp, #252]
+# CHECK: ldp      w2, wzr, [sp, #-256]
+# CHECK: ldp      w9, w10, [sp, #4]
+0xe3 0x17 0x40 0x29
+0xff 0xa7 0x1f 0x29
+0xe2 0x7f 0x60 0x29
+0xe9 0xab 0x40 0x29
+
+# CHECK: ldpsw    x9, x10, [sp, #4]
+# CHECK: ldpsw    x9, x10, [x2, #-256]
+# CHECK: ldpsw    x20, x30, [sp, #252]
+0xe9 0xab 0x40 0x69
+0x49 0x28 0x60 0x69
+0xf4 0xfb 0x5f 0x69
+
+# CHECK: ldp      x21, x29, [x2, #504]
+# CHECK: ldp      x22, x23, [x3, #-512]
+# CHECK: ldp      x24, x25, [x4, #8]
+0x55 0xf4 0x5f 0xa9
+0x76 0x5c 0x60 0xa9
+0x98 0xe4 0x40 0xa9
+
+# CHECK: ldp      s29, s28, [sp, #252]
+# CHECK: stp      s27, s26, [sp, #-256]
+# CHECK: ldp      s1, s2, [x3, #44]
+0xfd 0xf3 0x5f 0x2d
+0xfb 0x6b 0x20 0x2d
+0x61 0x88 0x45 0x2d
+
+# CHECK: stp      d3, d5, [x9, #504]
+# CHECK: stp      d7, d11, [x10, #-512]
+# CHECK: ldp      d2, d3, [x30, #-8]
+0x23 0x95 0x1f 0x6d
+0x47 0x2d 0x20 0x6d
+0xc2 0x8f 0x7f 0x6d
+
+# CHECK: stp      q3, q5, [sp]
+# CHECK: stp      q17, q19, [sp, #1008]
+# CHECK: ldp      q23, q29, [x1, #-1024]
+0xe3 0x17 0x0 0xad
+0xf1 0xcf 0x1f 0xad
+0x37 0x74 0x60 0xad
+
+#------------------------------------------------------------------------------
+# Load/store register pair (post-indexed)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp], #0
+# CHECK: stp      wzr, w9, [sp], #252
+# CHECK: ldp      w2, wzr, [sp], #-256
+# CHECK: ldp      w9, w10, [sp], #4
+0xe3 0x17 0xc0 0x28
+0xff 0xa7 0x9f 0x28
+0xe2 0x7f 0xe0 0x28
+0xe9 0xab 0xc0 0x28
+
+# CHECK: ldpsw    x9, x10, [sp], #4
+# CHECK: ldpsw    x9, x10, [x2], #-256
+# CHECK: ldpsw    x20, x30, [sp], #252
+0xe9 0xab 0xc0 0x68
+0x49 0x28 0xe0 0x68
+0xf4 0xfb 0xdf 0x68
+
+# CHECK: ldp      x21, x29, [x2], #504
+# CHECK: ldp      x22, x23, [x3], #-512
+# CHECK: ldp      x24, x25, [x4], #8
+0x55 0xf4 0xdf 0xa8
+0x76 0x5c 0xe0 0xa8
+0x98 0xe4 0xc0 0xa8
+
+# CHECK: ldp      s29, s28, [sp], #252
+# CHECK: stp      s27, s26, [sp], #-256
+# CHECK: ldp      s1, s2, [x3], #44
+0xfd 0xf3 0xdf 0x2c
+0xfb 0x6b 0xa0 0x2c
+0x61 0x88 0xc5 0x2c
+
+# CHECK: stp      d3, d5, [x9], #504
+# CHECK: stp      d7, d11, [x10], #-512
+# CHECK: ldp      d2, d3, [x30], #-8
+0x23 0x95 0x9f 0x6c
+0x47 0x2d 0xa0 0x6c
+0xc2 0x8f 0xff 0x6c
+
+# CHECK: stp      q3, q5, [sp], #0
+# CHECK: stp      q17, q19, [sp], #1008
+# CHECK: ldp      q23, q29, [x1], #-1024
+0xe3 0x17 0x80 0xac
+0xf1 0xcf 0x9f 0xac
+0x37 0x74 0xe0 0xac
+
+#------------------------------------------------------------------------------
+# Load/store register pair (pre-indexed)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp, #0]!
+# CHECK: stp      wzr, w9, [sp, #252]!
+# CHECK: ldp      w2, wzr, [sp, #-256]!
+# CHECK: ldp      w9, w10, [sp, #4]!
+0xe3 0x17 0xc0 0x29
+0xff 0xa7 0x9f 0x29
+0xe2 0x7f 0xe0 0x29
+0xe9 0xab 0xc0 0x29
+
+# CHECK: ldpsw    x9, x10, [sp, #4]!
+# CHECK: ldpsw    x9, x10, [x2, #-256]!
+# CHECK: ldpsw    x20, x30, [sp, #252]!
+0xe9 0xab 0xc0 0x69
+0x49 0x28 0xe0 0x69
+0xf4 0xfb 0xdf 0x69
+
+# CHECK: ldp      x21, x29, [x2, #504]!
+# CHECK: ldp      x22, x23, [x3, #-512]!
+# CHECK: ldp      x24, x25, [x4, #8]!
+0x55 0xf4 0xdf 0xa9
+0x76 0x5c 0xe0 0xa9
+0x98 0xe4 0xc0 0xa9
+
+# CHECK: ldp      s29, s28, [sp, #252]!
+# CHECK: stp      s27, s26, [sp, #-256]!
+# CHECK: ldp      s1, s2, [x3, #44]!
+0xfd 0xf3 0xdf 0x2d
+0xfb 0x6b 0xa0 0x2d
+0x61 0x88 0xc5 0x2d
+
+# CHECK: stp      d3, d5, [x9, #504]!
+# CHECK: stp      d7, d11, [x10, #-512]!
+# CHECK: ldp      d2, d3, [x30, #-8]!
+0x23 0x95 0x9f 0x6d
+0x47 0x2d 0xa0 0x6d
+0xc2 0x8f 0xff 0x6d
+
+# CHECK: stp      q3, q5, [sp, #0]!
+# CHECK: stp      q17, q19, [sp, #1008]!
+# CHECK: ldp      q23, q29, [x1, #-1024]!
+0xe3 0x17 0x80 0xad
+0xf1 0xcf 0x9f 0xad
+0x37 0x74 0xe0 0xad
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldnp      w3, w5, [sp]
+# CHECK: stnp      wzr, w9, [sp, #252]
+# CHECK: ldnp      w2, wzr, [sp, #-256]
+# CHECK: ldnp      w9, w10, [sp, #4]
+0xe3 0x17 0x40 0x28
+0xff 0xa7 0x1f 0x28
+0xe2 0x7f 0x60 0x28
+0xe9 0xab 0x40 0x28
+
+# CHECK: ldnp      x21, x29, [x2, #504]
+# CHECK: ldnp      x22, x23, [x3, #-512]
+# CHECK: ldnp      x24, x25, [x4, #8]
+0x55 0xf4 0x5f 0xa8
+0x76 0x5c 0x60 0xa8
+0x98 0xe4 0x40 0xa8
+
+# CHECK: ldnp      s29, s28, [sp, #252]
+# CHECK: stnp      s27, s26, [sp, #-256]
+# CHECK: ldnp      s1, s2, [x3, #44]
+0xfd 0xf3 0x5f 0x2c
+0xfb 0x6b 0x20 0x2c
+0x61 0x88 0x45 0x2c
+
+# CHECK: stnp      d3, d5, [x9, #504]
+# CHECK: stnp      d7, d11, [x10, #-512]
+# CHECK: ldnp      d2, d3, [x30, #-8]
+0x23 0x95 0x1f 0x6c
+0x47 0x2d 0x20 0x6c
+0xc2 0x8f 0x7f 0x6c
+
+# CHECK: stnp      q3, q5, [sp]
+# CHECK: stnp      q17, q19, [sp, #1008]
+# CHECK: ldnp      q23, q29, [x1, #-1024]
+0xe3 0x17 0x0 0xac
+0xf1 0xcf 0x1f 0xac
+0x37 0x74 0x60 0xac
+
+#------------------------------------------------------------------------------
+# Logical (immediate)
+#------------------------------------------------------------------------------
+# CHECK: orr      w3, w9, #0xffff0000
+# CHECK: orr      wsp, w10, #0xe00000ff
+# CHECK: orr      w9, w10, #0x3ff
+0x23 0x3d 0x10 0x32
+0x5f 0x29 0x3 0x32
+0x49 0x25 0x0 0x32
+
+# CHECK: and      w14, w15, #0x80008000
+# CHECK: and      w12, w13, #0xffc3ffc3
+# CHECK: and      w11, wzr, #0x30003
+0xee 0x81 0x1 0x12
+0xac 0xad 0xa 0x12
+0xeb 0x87 0x0 0x12
+
+# CHECK: eor      w3, w6, #0xe0e0e0e0
+# CHECK: eor      wsp, wzr, #0x3030303
+# CHECK: eor      w16, w17, #0x81818181
+0xc3 0xc8 0x3 0x52
+0xff 0xc7 0x0 0x52
+0x30 0xc6 0x1 0x52
+
+# CHECK: ands     wzr, w18, #0xcccccccc
+# CHECK: ands     w19, w20, #0x33333333
+# CHECK: ands     w21, w22, #0x99999999
+0x5f 0xe6 0x2 0x72
+0x93 0xe6 0x0 0x72
+0xd5 0xe6 0x1 0x72
+
+# CHECK: ands     wzr, w3, #0xaaaaaaaa
+# CHECK: ands     wzr, wzr, #0x55555555
+0x7f 0xf0 0x1 0x72
+0xff 0xf3 0x0 0x72
+
+# CHECK: eor      x3, x5, #0xffffffffc000000
+# CHECK: and      x9, x10, #0x7fffffffffff
+# CHECK: orr      x11, x12, #0x8000000000000fff
+0xa3 0x84 0x66 0xd2
+0x49 0xb9 0x40 0x92
+0x8b 0x31 0x41 0xb2
+
+# CHECK: orr      x3, x9, #0xffff0000ffff0000
+# CHECK: orr      sp, x10, #0xe00000ffe00000ff
+# CHECK: orr      x9, x10, #0x3ff000003ff
+0x23 0x3d 0x10 0xb2
+0x5f 0x29 0x3 0xb2
+0x49 0x25 0x0 0xb2
+
+# CHECK: and      x14, x15, #0x8000800080008000
+# CHECK: and      x12, x13, #0xffc3ffc3ffc3ffc3
+# CHECK: and      x11, xzr, #0x3000300030003
+0xee 0x81 0x1 0x92
+0xac 0xad 0xa 0x92
+0xeb 0x87 0x0 0x92
+
+# CHECK: eor      x3, x6, #0xe0e0e0e0e0e0e0e0
+# CHECK: eor      sp, xzr, #0x303030303030303
+# CHECK: eor      x16, x17, #0x8181818181818181
+0xc3 0xc8 0x3 0xd2
+0xff 0xc7 0x0 0xd2
+0x30 0xc6 0x1 0xd2
+
+# CHECK: ands     xzr, x18, #0xcccccccccccccccc
+# CHECK: ands     x19, x20, #0x3333333333333333
+# CHECK: ands     x21, x22, #0x9999999999999999
+0x5f 0xe6 0x2 0xf2
+0x93 0xe6 0x0 0xf2
+0xd5 0xe6 0x1 0xf2
+
+# CHECK: ands     xzr, x3, #0xaaaaaaaaaaaaaaaa
+# CHECK: ands     xzr, xzr, #0x5555555555555555
+0x7f 0xf0 0x1 0xf2
+0xff 0xf3 0x0 0xf2
+
+# CHECK: orr      w3, wzr, #0xf000f
+# CHECK: orr      x10, xzr, #0xaaaaaaaaaaaaaaaa
+0xe3 0x8f 0x0 0x32
+0xea 0xf3 0x1 0xb2
+
+# CHECK: orr      w3, wzr, #0xffff
+# CHECK: orr      x9, xzr, #0xffff00000000
+0xe3 0x3f 0x0 0x32
+0xe9 0x3f 0x60 0xb2
+
+#------------------------------------------------------------------------------
+# Logical (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: and      w12, w23, w21
+# CHECK: and      w16, w15, w1, lsl #1
+# CHECK: and      w9, w4, w10, lsl #31
+# CHECK: and      w3, w30, w11
+# CHECK: and      x3, x5, x7, lsl #63
+0xec 0x2 0x15 0xa
+0xf0 0x5 0x1 0xa
+0x89 0x7c 0xa 0xa
+0xc3 0x3 0xb 0xa
+0xa3 0xfc 0x7 0x8a
+
+# CHECK: and      x5, x14, x19, asr #4
+# CHECK: and      w3, w17, w19, ror #31
+# CHECK: and      w0, w2, wzr, lsr #17
+# CHECK: and      w3, w30, w11, asr
+0xc5 0x11 0x93 0x8a
+0x23 0x7e 0xd3 0xa
+0x40 0x44 0x5f 0xa
+0xc3 0x3 0x8b 0xa
+
+# CHECK: and      xzr, x4, x26
+# CHECK: and      w3, wzr, w20, ror
+# CHECK: and      x7, x20, xzr, asr #63
+0x9f 0x0 0x1a 0x8a
+0xe3 0x3 0xd4 0xa
+0x87 0xfe 0x9f 0x8a
+
+# CHECK: bic      x13, x20, x14, lsl #47
+# CHECK: bic      w2, w7, w9
+# CHECK: orr      w2, w7, w0, asr #31
+# CHECK: orr      x8, x9, x10, lsl #12
+# CHECK: orn      x3, x5, x7, asr
+# CHECK: orn      w2, w5, w29
+0x8d 0xbe 0x2e 0x8a
+0xe2 0x0 0x29 0xa
+0xe2 0x7c 0x80 0x2a
+0x28 0x31 0xa 0xaa
+0xa3 0x0 0xa7 0xaa
+0xa2 0x0 0x3d 0x2a
+
+# CHECK: ands     w7, wzr, w9, lsl #1
+# CHECK: ands     x3, x5, x20, ror #63
+# CHECK: bics     w3, w5, w7
+# CHECK: bics     x3, xzr, x3, lsl #1
+# CHECK: tst      w3, w7, lsl #31
+# CHECK: tst      x2, x20, asr
+0xe7 0x7 0x9 0x6a
+0xa3 0xfc 0xd4 0xea
+0xa3 0x0 0x27 0x6a
+0xe3 0x7 0x23 0xea
+0x7f 0x7c 0x7 0x6a
+0x5f 0x0 0x94 0xea
+
+# CHECK: mov      x3, x6
+# CHECK: mov      x3, xzr
+# CHECK: mov      wzr, w2
+# CHECK: mov      w3, w5
+0xe3 0x3 0x6 0xaa
+0xe3 0x3 0x1f 0xaa
+0xff 0x3 0x2 0x2a
+0xe3 0x3 0x5 0x2a
+
+#------------------------------------------------------------------------------
+# Move wide (immediate)
+#------------------------------------------------------------------------------
+
+# N.b. (FIXME) canonical aliases aren't produced here because of
+# limitation in InstAlias. Lots of the "mov[nz]" instructions should
+# be "mov".
+
+# CHECK: movz     w1, #65535
+# CHECK: movz     w2, #0, lsl #16
+# CHECK: movn     w2, #1234
+0xe1 0xff 0x9f 0x52
+0x2 0x0 0xa0 0x52
+0x42 0x9a 0x80 0x12
+
+# CHECK: movz     x2, #1234, lsl #32
+# CHECK: movk     xzr, #4321, lsl #48
+0x42 0x9a 0xc0 0xd2
+0x3f 0x1c 0xe2 0xf2
+
+# CHECK: movz     x2, #0
+# CHECK: movk     w3, #0
+# CHECK: movz     x4, #0, lsl #16
+# CHECK: movk     w5, #0, lsl #16
+# CHECK: movz     x6, #0, lsl #32
+# CHECK: movk     x7, #0, lsl #32
+# CHECK: movz     x8, #0, lsl #48
+# CHECK: movk     x9, #0, lsl #48
+0x2 0x0 0x80 0xd2
+0x3 0x0 0x80 0x72
+0x4 0x0 0xa0 0xd2
+0x5 0x0 0xa0 0x72
+0x6 0x0 0xc0 0xd2
+0x7 0x0 0xc0 0xf2
+0x8 0x0 0xe0 0xd2
+0x9 0x0 0xe0 0xf2
+
+#------------------------------------------------------------------------------
+# PC-relative addressing
+#------------------------------------------------------------------------------
+
+# It's slightly dodgy using immediates here, but harmless enough when
+# it's all that's available.
+
+# CHECK: adr      x2, #1600
+# CHECK: adrp     x21, #6553600
+# CHECK: adr      x0, #262144
+0x02 0x32 0x00 0x10
+0x15 0x32 0x00 0x90
+0x00 0x00 0x20 0x10
+
+#------------------------------------------------------------------------------
+# System
+#------------------------------------------------------------------------------
+
+# CHECK: nop
+# CHECK: hint     #127
+# CHECK: nop
+# CHECK: yield
+# CHECK: wfe
+# CHECK: wfi
+# CHECK: sev
+# CHECK: sevl
+0x1f 0x20 0x3 0xd5
+0xff 0x2f 0x3 0xd5
+0x1f 0x20 0x3 0xd5
+0x3f 0x20 0x3 0xd5
+0x5f 0x20 0x3 0xd5
+0x7f 0x20 0x3 0xd5
+0x9f 0x20 0x3 0xd5
+0xbf 0x20 0x3 0xd5
+
+# CHECK: clrex
+# CHECK: clrex    #0
+# CHECK: clrex    #7
+# CHECK: clrex
+0x5f 0x3f 0x3 0xd5
+0x5f 0x30 0x3 0xd5
+0x5f 0x37 0x3 0xd5
+0x5f 0x3f 0x3 0xd5
+
+# CHECK: dsb      #0
+# CHECK: dsb      #12
+# CHECK: dsb      sy
+# CHECK: dsb      oshld
+# CHECK: dsb      oshst
+# CHECK: dsb      osh
+# CHECK: dsb      nshld
+# CHECK: dsb      nshst
+# CHECK: dsb      nsh
+# CHECK: dsb      ishld
+# CHECK: dsb      ishst
+# CHECK: dsb      ish
+# CHECK: dsb      ld
+# CHECK: dsb      st
+# CHECK: dsb      sy
+0x9f 0x30 0x3 0xd5
+0x9f 0x3c 0x3 0xd5
+0x9f 0x3f 0x3 0xd5
+0x9f 0x31 0x3 0xd5
+0x9f 0x32 0x3 0xd5
+0x9f 0x33 0x3 0xd5
+0x9f 0x35 0x3 0xd5
+0x9f 0x36 0x3 0xd5
+0x9f 0x37 0x3 0xd5
+0x9f 0x39 0x3 0xd5
+0x9f 0x3a 0x3 0xd5
+0x9f 0x3b 0x3 0xd5
+0x9f 0x3d 0x3 0xd5
+0x9f 0x3e 0x3 0xd5
+0x9f 0x3f 0x3 0xd5
+
+# CHECK: dmb      #0
+# CHECK: dmb      #12
+# CHECK: dmb      sy
+# CHECK: dmb      oshld
+# CHECK: dmb      oshst
+# CHECK: dmb      osh
+# CHECK: dmb      nshld
+# CHECK: dmb      nshst
+# CHECK: dmb      nsh
+# CHECK: dmb      ishld
+# CHECK: dmb      ishst
+# CHECK: dmb      ish
+# CHECK: dmb      ld
+# CHECK: dmb      st
+# CHECK: dmb      sy
+0xbf 0x30 0x3 0xd5
+0xbf 0x3c 0x3 0xd5
+0xbf 0x3f 0x3 0xd5
+0xbf 0x31 0x3 0xd5
+0xbf 0x32 0x3 0xd5
+0xbf 0x33 0x3 0xd5
+0xbf 0x35 0x3 0xd5
+0xbf 0x36 0x3 0xd5
+0xbf 0x37 0x3 0xd5
+0xbf 0x39 0x3 0xd5
+0xbf 0x3a 0x3 0xd5
+0xbf 0x3b 0x3 0xd5
+0xbf 0x3d 0x3 0xd5
+0xbf 0x3e 0x3 0xd5
+0xbf 0x3f 0x3 0xd5
+
+# CHECK: isb
+# CHECK: isb      #12
+0xdf 0x3f 0x3 0xd5
+0xdf 0x3c 0x3 0xd5
+
+# CHECK: msr      spsel, #0
+# CHECK: msr      daifset, #15
+# CHECK: msr      daifclr, #12
+0xbf 0x40 0x0 0xd5
+0xdf 0x4f 0x3 0xd5
+0xff 0x4c 0x3 0xd5
+
+# CHECK: sys      #7, c5, c9, #7, x5
+# CHECK: sys      #0, c15, c15, #2
+# CHECK: sysl     x9, #7, c5, c9, #7
+# CHECK: sysl     x1, #0, c15, c15, #2
+0xe5 0x59 0xf 0xd5
+0x5f 0xff 0x8 0xd5
+0xe9 0x59 0x2f 0xd5
+0x41 0xff 0x28 0xd5
+
+# CHECK: sys     #0, c7, c1, #0, xzr
+# CHECK: sys     #0, c7, c5, #0, xzr
+# CHECK: sys     #3, c7, c5, #1, x9
+0x1f 0x71 0x8 0xd5
+0x1f 0x75 0x8 0xd5
+0x29 0x75 0xb 0xd5
+
+# CHECK: sys     #3, c7, c4, #1, x12
+# CHECK: sys     #0, c7, c6, #1, xzr
+# CHECK: sys     #0, c7, c6, #2, x2
+# CHECK: sys     #3, c7, c10, #1, x9
+# CHECK: sys     #0, c7, c10, #2, x10
+# CHECK: sys     #3, c7, c11, #1, x0
+# CHECK: sys     #3, c7, c14, #1, x3
+# CHECK: sys     #0, c7, c14, #2, x30
+0x2c 0x74 0xb 0xd5
+0x3f 0x76 0x8 0xd5
+0x42 0x76 0x8 0xd5
+0x29 0x7a 0xb 0xd5
+0x4a 0x7a 0x8 0xd5
+0x20 0x7b 0xb 0xd5
+0x23 0x7e 0xb 0xd5
+0x5e 0x7e 0x8 0xd5
+
+
+# CHECK: msr      teecr32_el1, x12
+# CHECK: msr      osdtrrx_el1, x12
+# CHECK: msr      mdccint_el1, x12
+# CHECK: msr      mdscr_el1, x12
+# CHECK: msr      osdtrtx_el1, x12
+# CHECK: msr      dbgdtr_el0, x12
+# CHECK: msr      dbgdtrtx_el0, x12
+# CHECK: msr      oseccr_el1, x12
+# CHECK: msr      dbgvcr32_el2, x12
+# CHECK: msr      dbgbvr0_el1, x12
+# CHECK: msr      dbgbvr1_el1, x12
+# CHECK: msr      dbgbvr2_el1, x12
+# CHECK: msr      dbgbvr3_el1, x12
+# CHECK: msr      dbgbvr4_el1, x12
+# CHECK: msr      dbgbvr5_el1, x12
+# CHECK: msr      dbgbvr6_el1, x12
+# CHECK: msr      dbgbvr7_el1, x12
+# CHECK: msr      dbgbvr8_el1, x12
+# CHECK: msr      dbgbvr9_el1, x12
+# CHECK: msr      dbgbvr10_el1, x12
+# CHECK: msr      dbgbvr11_el1, x12
+# CHECK: msr      dbgbvr12_el1, x12
+# CHECK: msr      dbgbvr13_el1, x12
+# CHECK: msr      dbgbvr14_el1, x12
+# CHECK: msr      dbgbvr15_el1, x12
+# CHECK: msr      dbgbcr0_el1, x12
+# CHECK: msr      dbgbcr1_el1, x12
+# CHECK: msr      dbgbcr2_el1, x12
+# CHECK: msr      dbgbcr3_el1, x12
+# CHECK: msr      dbgbcr4_el1, x12
+# CHECK: msr      dbgbcr5_el1, x12
+# CHECK: msr      dbgbcr6_el1, x12
+# CHECK: msr      dbgbcr7_el1, x12
+# CHECK: msr      dbgbcr8_el1, x12
+# CHECK: msr      dbgbcr9_el1, x12
+# CHECK: msr      dbgbcr10_el1, x12
+# CHECK: msr      dbgbcr11_el1, x12
+# CHECK: msr      dbgbcr12_el1, x12
+# CHECK: msr      dbgbcr13_el1, x12
+# CHECK: msr      dbgbcr14_el1, x12
+# CHECK: msr      dbgbcr15_el1, x12
+# CHECK: msr      dbgwvr0_el1, x12
+# CHECK: msr      dbgwvr1_el1, x12
+# CHECK: msr      dbgwvr2_el1, x12
+# CHECK: msr      dbgwvr3_el1, x12
+# CHECK: msr      dbgwvr4_el1, x12
+# CHECK: msr      dbgwvr5_el1, x12
+# CHECK: msr      dbgwvr6_el1, x12
+# CHECK: msr      dbgwvr7_el1, x12
+# CHECK: msr      dbgwvr8_el1, x12
+# CHECK: msr      dbgwvr9_el1, x12
+# CHECK: msr      dbgwvr10_el1, x12
+# CHECK: msr      dbgwvr11_el1, x12
+# CHECK: msr      dbgwvr12_el1, x12
+# CHECK: msr      dbgwvr13_el1, x12
+# CHECK: msr      dbgwvr14_el1, x12
+# CHECK: msr      dbgwvr15_el1, x12
+# CHECK: msr      dbgwcr0_el1, x12
+# CHECK: msr      dbgwcr1_el1, x12
+# CHECK: msr      dbgwcr2_el1, x12
+# CHECK: msr      dbgwcr3_el1, x12
+# CHECK: msr      dbgwcr4_el1, x12
+# CHECK: msr      dbgwcr5_el1, x12
+# CHECK: msr      dbgwcr6_el1, x12
+# CHECK: msr      dbgwcr7_el1, x12
+# CHECK: msr      dbgwcr8_el1, x12
+# CHECK: msr      dbgwcr9_el1, x12
+# CHECK: msr      dbgwcr10_el1, x12
+# CHECK: msr      dbgwcr11_el1, x12
+# CHECK: msr      dbgwcr12_el1, x12
+# CHECK: msr      dbgwcr13_el1, x12
+# CHECK: msr      dbgwcr14_el1, x12
+# CHECK: msr      dbgwcr15_el1, x12
+# CHECK: msr      teehbr32_el1, x12
+# CHECK: msr      oslar_el1, x12
+# CHECK: msr      osdlr_el1, x12
+# CHECK: msr      dbgprcr_el1, x12
+# CHECK: msr      dbgclaimset_el1, x12
+# CHECK: msr      dbgclaimclr_el1, x12
+# CHECK: msr      csselr_el1, x12
+# CHECK: msr      vpidr_el2, x12
+# CHECK: msr      vmpidr_el2, x12
+# CHECK: msr      sctlr_el1, x12
+# CHECK: msr      sctlr_el2, x12
+# CHECK: msr      sctlr_el3, x12
+# CHECK: msr      actlr_el1, x12
+# CHECK: msr      actlr_el2, x12
+# CHECK: msr      actlr_el3, x12
+# CHECK: msr      cpacr_el1, x12
+# CHECK: msr      hcr_el2, x12
+# CHECK: msr      scr_el3, x12
+# CHECK: msr      mdcr_el2, x12
+# CHECK: msr      sder32_el3, x12
+# CHECK: msr      cptr_el2, x12
+# CHECK: msr      cptr_el3, x12
+# CHECK: msr      hstr_el2, x12
+# CHECK: msr      hacr_el2, x12
+# CHECK: msr      mdcr_el3, x12
+# CHECK: msr      ttbr0_el1, x12
+# CHECK: msr      ttbr0_el2, x12
+# CHECK: msr      ttbr0_el3, x12
+# CHECK: msr      ttbr1_el1, x12
+# CHECK: msr      tcr_el1, x12
+# CHECK: msr      tcr_el2, x12
+# CHECK: msr      tcr_el3, x12
+# CHECK: msr      vttbr_el2, x12
+# CHECK: msr      vtcr_el2, x12
+# CHECK: msr      dacr32_el2, x12
+# CHECK: msr      spsr_el1, x12
+# CHECK: msr      spsr_el2, x12
+# CHECK: msr      spsr_el3, x12
+# CHECK: msr      elr_el1, x12
+# CHECK: msr      elr_el2, x12
+# CHECK: msr      elr_el3, x12
+# CHECK: msr      sp_el0, x12
+# CHECK: msr      sp_el1, x12
+# CHECK: msr      sp_el2, x12
+# CHECK: msr      spsel, x12
+# CHECK: msr      nzcv, x12
+# CHECK: msr      daif, x12
+# CHECK: msr      currentel, x12
+# CHECK: msr      spsr_irq, x12
+# CHECK: msr      spsr_abt, x12
+# CHECK: msr      spsr_und, x12
+# CHECK: msr      spsr_fiq, x12
+# CHECK: msr      fpcr, x12
+# CHECK: msr      fpsr, x12
+# CHECK: msr      dspsr_el0, x12
+# CHECK: msr      dlr_el0, x12
+# CHECK: msr      ifsr32_el2, x12
+# CHECK: msr      afsr0_el1, x12
+# CHECK: msr      afsr0_el2, x12
+# CHECK: msr      afsr0_el3, x12
+# CHECK: msr      afsr1_el1, x12
+# CHECK: msr      afsr1_el2, x12
+# CHECK: msr      afsr1_el3, x12
+# CHECK: msr      esr_el1, x12
+# CHECK: msr      esr_el2, x12
+# CHECK: msr      esr_el3, x12
+# CHECK: msr      fpexc32_el2, x12
+# CHECK: msr      far_el1, x12
+# CHECK: msr      far_el2, x12
+# CHECK: msr      far_el3, x12
+# CHECK: msr      hpfar_el2, x12
+# CHECK: msr      par_el1, x12
+# CHECK: msr      pmcr_el0, x12
+# CHECK: msr      pmcntenset_el0, x12
+# CHECK: msr      pmcntenclr_el0, x12
+# CHECK: msr      pmovsclr_el0, x12
+# CHECK: msr      pmselr_el0, x12
+# CHECK: msr      pmccntr_el0, x12
+# CHECK: msr      pmxevtyper_el0, x12
+# CHECK: msr      pmxevcntr_el0, x12
+# CHECK: msr      pmuserenr_el0, x12
+# CHECK: msr      pmintenset_el1, x12
+# CHECK: msr      pmintenclr_el1, x12
+# CHECK: msr      pmovsset_el0, x12
+# CHECK: msr      mair_el1, x12
+# CHECK: msr      mair_el2, x12
+# CHECK: msr      mair_el3, x12
+# CHECK: msr      amair_el1, x12
+# CHECK: msr      amair_el2, x12
+# CHECK: msr      amair_el3, x12
+# CHECK: msr      vbar_el1, x12
+# CHECK: msr      vbar_el2, x12
+# CHECK: msr      vbar_el3, x12
+# CHECK: msr      rmr_el1, x12
+# CHECK: msr      rmr_el2, x12
+# CHECK: msr      rmr_el3, x12
+# CHECK: msr      tpidr_el0, x12
+# CHECK: msr      tpidr_el2, x12
+# CHECK: msr      tpidr_el3, x12
+# CHECK: msr      tpidrro_el0, x12
+# CHECK: msr      tpidr_el1, x12
+# CHECK: msr      cntfrq_el0, x12
+# CHECK: msr      cntvoff_el2, x12
+# CHECK: msr      cntkctl_el1, x12
+# CHECK: msr      cnthctl_el2, x12
+# CHECK: msr      cntp_tval_el0, x12
+# CHECK: msr      cnthp_tval_el2, x12
+# CHECK: msr      cntps_tval_el1, x12
+# CHECK: msr      cntp_ctl_el0, x12
+# CHECK: msr      cnthp_ctl_el2, x12
+# CHECK: msr      cntps_ctl_el1, x12
+# CHECK: msr      cntp_cval_el0, x12
+# CHECK: msr      cnthp_cval_el2, x12
+# CHECK: msr      cntps_cval_el1, x12
+# CHECK: msr      cntv_tval_el0, x12
+# CHECK: msr      cntv_ctl_el0, x12
+# CHECK: msr      cntv_cval_el0, x12
+# CHECK: msr      pmevcntr0_el0, x12
+# CHECK: msr      pmevcntr1_el0, x12
+# CHECK: msr      pmevcntr2_el0, x12
+# CHECK: msr      pmevcntr3_el0, x12
+# CHECK: msr      pmevcntr4_el0, x12
+# CHECK: msr      pmevcntr5_el0, x12
+# CHECK: msr      pmevcntr6_el0, x12
+# CHECK: msr      pmevcntr7_el0, x12
+# CHECK: msr      pmevcntr8_el0, x12
+# CHECK: msr      pmevcntr9_el0, x12
+# CHECK: msr      pmevcntr10_el0, x12
+# CHECK: msr      pmevcntr11_el0, x12
+# CHECK: msr      pmevcntr12_el0, x12
+# CHECK: msr      pmevcntr13_el0, x12
+# CHECK: msr      pmevcntr14_el0, x12
+# CHECK: msr      pmevcntr15_el0, x12
+# CHECK: msr      pmevcntr16_el0, x12
+# CHECK: msr      pmevcntr17_el0, x12
+# CHECK: msr      pmevcntr18_el0, x12
+# CHECK: msr      pmevcntr19_el0, x12
+# CHECK: msr      pmevcntr20_el0, x12
+# CHECK: msr      pmevcntr21_el0, x12
+# CHECK: msr      pmevcntr22_el0, x12
+# CHECK: msr      pmevcntr23_el0, x12
+# CHECK: msr      pmevcntr24_el0, x12
+# CHECK: msr      pmevcntr25_el0, x12
+# CHECK: msr      pmevcntr26_el0, x12
+# CHECK: msr      pmevcntr27_el0, x12
+# CHECK: msr      pmevcntr28_el0, x12
+# CHECK: msr      pmevcntr29_el0, x12
+# CHECK: msr      pmevcntr30_el0, x12
+# CHECK: msr      pmccfiltr_el0, x12
+# CHECK: msr      pmevtyper0_el0, x12
+# CHECK: msr      pmevtyper1_el0, x12
+# CHECK: msr      pmevtyper2_el0, x12
+# CHECK: msr      pmevtyper3_el0, x12
+# CHECK: msr      pmevtyper4_el0, x12
+# CHECK: msr      pmevtyper5_el0, x12
+# CHECK: msr      pmevtyper6_el0, x12
+# CHECK: msr      pmevtyper7_el0, x12
+# CHECK: msr      pmevtyper8_el0, x12
+# CHECK: msr      pmevtyper9_el0, x12
+# CHECK: msr      pmevtyper10_el0, x12
+# CHECK: msr      pmevtyper11_el0, x12
+# CHECK: msr      pmevtyper12_el0, x12
+# CHECK: msr      pmevtyper13_el0, x12
+# CHECK: msr      pmevtyper14_el0, x12
+# CHECK: msr      pmevtyper15_el0, x12
+# CHECK: msr      pmevtyper16_el0, x12
+# CHECK: msr      pmevtyper17_el0, x12
+# CHECK: msr      pmevtyper18_el0, x12
+# CHECK: msr      pmevtyper19_el0, x12
+# CHECK: msr      pmevtyper20_el0, x12
+# CHECK: msr      pmevtyper21_el0, x12
+# CHECK: msr      pmevtyper22_el0, x12
+# CHECK: msr      pmevtyper23_el0, x12
+# CHECK: msr      pmevtyper24_el0, x12
+# CHECK: msr      pmevtyper25_el0, x12
+# CHECK: msr      pmevtyper26_el0, x12
+# CHECK: msr      pmevtyper27_el0, x12
+# CHECK: msr      pmevtyper28_el0, x12
+# CHECK: msr      pmevtyper29_el0, x12
+# CHECK: msr      pmevtyper30_el0, x12
+# CHECK: mrs      x9, teecr32_el1
+# CHECK: mrs      x9, osdtrrx_el1
+# CHECK: mrs      x9, mdccsr_el0
+# CHECK: mrs      x9, mdccint_el1
+# CHECK: mrs      x9, mdscr_el1
+# CHECK: mrs      x9, osdtrtx_el1
+# CHECK: mrs      x9, dbgdtr_el0
+# CHECK: mrs      x9, dbgdtrrx_el0
+# CHECK: mrs      x9, oseccr_el1
+# CHECK: mrs      x9, dbgvcr32_el2
+# CHECK: mrs      x9, dbgbvr0_el1
+# CHECK: mrs      x9, dbgbvr1_el1
+# CHECK: mrs      x9, dbgbvr2_el1
+# CHECK: mrs      x9, dbgbvr3_el1
+# CHECK: mrs      x9, dbgbvr4_el1
+# CHECK: mrs      x9, dbgbvr5_el1
+# CHECK: mrs      x9, dbgbvr6_el1
+# CHECK: mrs      x9, dbgbvr7_el1
+# CHECK: mrs      x9, dbgbvr8_el1
+# CHECK: mrs      x9, dbgbvr9_el1
+# CHECK: mrs      x9, dbgbvr10_el1
+# CHECK: mrs      x9, dbgbvr11_el1
+# CHECK: mrs      x9, dbgbvr12_el1
+# CHECK: mrs      x9, dbgbvr13_el1
+# CHECK: mrs      x9, dbgbvr14_el1
+# CHECK: mrs      x9, dbgbvr15_el1
+# CHECK: mrs      x9, dbgbcr0_el1
+# CHECK: mrs      x9, dbgbcr1_el1
+# CHECK: mrs      x9, dbgbcr2_el1
+# CHECK: mrs      x9, dbgbcr3_el1
+# CHECK: mrs      x9, dbgbcr4_el1
+# CHECK: mrs      x9, dbgbcr5_el1
+# CHECK: mrs      x9, dbgbcr6_el1
+# CHECK: mrs      x9, dbgbcr7_el1
+# CHECK: mrs      x9, dbgbcr8_el1
+# CHECK: mrs      x9, dbgbcr9_el1
+# CHECK: mrs      x9, dbgbcr10_el1
+# CHECK: mrs      x9, dbgbcr11_el1
+# CHECK: mrs      x9, dbgbcr12_el1
+# CHECK: mrs      x9, dbgbcr13_el1
+# CHECK: mrs      x9, dbgbcr14_el1
+# CHECK: mrs      x9, dbgbcr15_el1
+# CHECK: mrs      x9, dbgwvr0_el1
+# CHECK: mrs      x9, dbgwvr1_el1
+# CHECK: mrs      x9, dbgwvr2_el1
+# CHECK: mrs      x9, dbgwvr3_el1
+# CHECK: mrs      x9, dbgwvr4_el1
+# CHECK: mrs      x9, dbgwvr5_el1
+# CHECK: mrs      x9, dbgwvr6_el1
+# CHECK: mrs      x9, dbgwvr7_el1
+# CHECK: mrs      x9, dbgwvr8_el1
+# CHECK: mrs      x9, dbgwvr9_el1
+# CHECK: mrs      x9, dbgwvr10_el1
+# CHECK: mrs      x9, dbgwvr11_el1
+# CHECK: mrs      x9, dbgwvr12_el1
+# CHECK: mrs      x9, dbgwvr13_el1
+# CHECK: mrs      x9, dbgwvr14_el1
+# CHECK: mrs      x9, dbgwvr15_el1
+# CHECK: mrs      x9, dbgwcr0_el1
+# CHECK: mrs      x9, dbgwcr1_el1
+# CHECK: mrs      x9, dbgwcr2_el1
+# CHECK: mrs      x9, dbgwcr3_el1
+# CHECK: mrs      x9, dbgwcr4_el1
+# CHECK: mrs      x9, dbgwcr5_el1
+# CHECK: mrs      x9, dbgwcr6_el1
+# CHECK: mrs      x9, dbgwcr7_el1
+# CHECK: mrs      x9, dbgwcr8_el1
+# CHECK: mrs      x9, dbgwcr9_el1
+# CHECK: mrs      x9, dbgwcr10_el1
+# CHECK: mrs      x9, dbgwcr11_el1
+# CHECK: mrs      x9, dbgwcr12_el1
+# CHECK: mrs      x9, dbgwcr13_el1
+# CHECK: mrs      x9, dbgwcr14_el1
+# CHECK: mrs      x9, dbgwcr15_el1
+# CHECK: mrs      x9, mdrar_el1
+# CHECK: mrs      x9, teehbr32_el1
+# CHECK: mrs      x9, oslsr_el1
+# CHECK: mrs      x9, osdlr_el1
+# CHECK: mrs      x9, dbgprcr_el1
+# CHECK: mrs      x9, dbgclaimset_el1
+# CHECK: mrs      x9, dbgclaimclr_el1
+# CHECK: mrs      x9, dbgauthstatus_el1
+# CHECK: mrs      x9, midr_el1
+# CHECK: mrs      x9, ccsidr_el1
+# CHECK: mrs      x9, csselr_el1
+# CHECK: mrs      x9, vpidr_el2
+# CHECK: mrs      x9, clidr_el1
+# CHECK: mrs      x9, ctr_el0
+# CHECK: mrs      x9, mpidr_el1
+# CHECK: mrs      x9, vmpidr_el2
+# CHECK: mrs      x9, revidr_el1
+# CHECK: mrs      x9, aidr_el1
+# CHECK: mrs      x9, dczid_el0
+# CHECK: mrs      x9, id_pfr0_el1
+# CHECK: mrs      x9, id_pfr1_el1
+# CHECK: mrs      x9, id_dfr0_el1
+# CHECK: mrs      x9, id_afr0_el1
+# CHECK: mrs      x9, id_mmfr0_el1
+# CHECK: mrs      x9, id_mmfr1_el1
+# CHECK: mrs      x9, id_mmfr2_el1
+# CHECK: mrs      x9, id_mmfr3_el1
+# CHECK: mrs      x9, id_isar0_el1
+# CHECK: mrs      x9, id_isar1_el1
+# CHECK: mrs      x9, id_isar2_el1
+# CHECK: mrs      x9, id_isar3_el1
+# CHECK: mrs      x9, id_isar4_el1
+# CHECK: mrs      x9, id_isar5_el1
+# CHECK: mrs      x9, mvfr0_el1
+# CHECK: mrs      x9, mvfr1_el1
+# CHECK: mrs      x9, mvfr2_el1
+# CHECK: mrs      x9, id_aa64pfr0_el1
+# CHECK: mrs      x9, id_aa64pfr1_el1
+# CHECK: mrs      x9, id_aa64dfr0_el1
+# CHECK: mrs      x9, id_aa64dfr1_el1
+# CHECK: mrs      x9, id_aa64afr0_el1
+# CHECK: mrs      x9, id_aa64afr1_el1
+# CHECK: mrs      x9, id_aa64isar0_el1
+# CHECK: mrs      x9, id_aa64isar1_el1
+# CHECK: mrs      x9, id_aa64mmfr0_el1
+# CHECK: mrs      x9, id_aa64mmfr1_el1
+# CHECK: mrs      x9, sctlr_el1
+# CHECK: mrs      x9, sctlr_el2
+# CHECK: mrs      x9, sctlr_el3
+# CHECK: mrs      x9, actlr_el1
+# CHECK: mrs      x9, actlr_el2
+# CHECK: mrs      x9, actlr_el3
+# CHECK: mrs      x9, cpacr_el1
+# CHECK: mrs      x9, hcr_el2
+# CHECK: mrs      x9, scr_el3
+# CHECK: mrs      x9, mdcr_el2
+# CHECK: mrs      x9, sder32_el3
+# CHECK: mrs      x9, cptr_el2
+# CHECK: mrs      x9, cptr_el3
+# CHECK: mrs      x9, hstr_el2
+# CHECK: mrs      x9, hacr_el2
+# CHECK: mrs      x9, mdcr_el3
+# CHECK: mrs      x9, ttbr0_el1
+# CHECK: mrs      x9, ttbr0_el2
+# CHECK: mrs      x9, ttbr0_el3
+# CHECK: mrs      x9, ttbr1_el1
+# CHECK: mrs      x9, tcr_el1
+# CHECK: mrs      x9, tcr_el2
+# CHECK: mrs      x9, tcr_el3
+# CHECK: mrs      x9, vttbr_el2
+# CHECK: mrs      x9, vtcr_el2
+# CHECK: mrs      x9, dacr32_el2
+# CHECK: mrs      x9, spsr_el1
+# CHECK: mrs      x9, spsr_el2
+# CHECK: mrs      x9, spsr_el3
+# CHECK: mrs      x9, elr_el1
+# CHECK: mrs      x9, elr_el2
+# CHECK: mrs      x9, elr_el3
+# CHECK: mrs      x9, sp_el0
+# CHECK: mrs      x9, sp_el1
+# CHECK: mrs      x9, sp_el2
+# CHECK: mrs      x9, spsel
+# CHECK: mrs      x9, nzcv
+# CHECK: mrs      x9, daif
+# CHECK: mrs      x9, currentel
+# CHECK: mrs      x9, spsr_irq
+# CHECK: mrs      x9, spsr_abt
+# CHECK: mrs      x9, spsr_und
+# CHECK: mrs      x9, spsr_fiq
+# CHECK: mrs      x9, fpcr
+# CHECK: mrs      x9, fpsr
+# CHECK: mrs      x9, dspsr_el0
+# CHECK: mrs      x9, dlr_el0
+# CHECK: mrs      x9, ifsr32_el2
+# CHECK: mrs      x9, afsr0_el1
+# CHECK: mrs      x9, afsr0_el2
+# CHECK: mrs      x9, afsr0_el3
+# CHECK: mrs      x9, afsr1_el1
+# CHECK: mrs      x9, afsr1_el2
+# CHECK: mrs      x9, afsr1_el3
+# CHECK: mrs      x9, esr_el1
+# CHECK: mrs      x9, esr_el2
+# CHECK: mrs      x9, esr_el3
+# CHECK: mrs      x9, fpexc32_el2
+# CHECK: mrs      x9, far_el1
+# CHECK: mrs      x9, far_el2
+# CHECK: mrs      x9, far_el3
+# CHECK: mrs      x9, hpfar_el2
+# CHECK: mrs      x9, par_el1
+# CHECK: mrs      x9, pmcr_el0
+# CHECK: mrs      x9, pmcntenset_el0
+# CHECK: mrs      x9, pmcntenclr_el0
+# CHECK: mrs      x9, pmovsclr_el0
+# CHECK: mrs      x9, pmselr_el0
+# CHECK: mrs      x9, pmceid0_el0
+# CHECK: mrs      x9, pmceid1_el0
+# CHECK: mrs      x9, pmccntr_el0
+# CHECK: mrs      x9, pmxevtyper_el0
+# CHECK: mrs      x9, pmxevcntr_el0
+# CHECK: mrs      x9, pmuserenr_el0
+# CHECK: mrs      x9, pmintenset_el1
+# CHECK: mrs      x9, pmintenclr_el1
+# CHECK: mrs      x9, pmovsset_el0
+# CHECK: mrs      x9, mair_el1
+# CHECK: mrs      x9, mair_el2
+# CHECK: mrs      x9, mair_el3
+# CHECK: mrs      x9, amair_el1
+# CHECK: mrs      x9, amair_el2
+# CHECK: mrs      x9, amair_el3
+# CHECK: mrs      x9, vbar_el1
+# CHECK: mrs      x9, vbar_el2
+# CHECK: mrs      x9, vbar_el3
+# CHECK: mrs      x9, rvbar_el1
+# CHECK: mrs      x9, rvbar_el2
+# CHECK: mrs      x9, rvbar_el3
+# CHECK: mrs      x9, rmr_el1
+# CHECK: mrs      x9, rmr_el2
+# CHECK: mrs      x9, rmr_el3
+# CHECK: mrs      x9, isr_el1
+# CHECK: mrs      x9, contextidr_el1
+# CHECK: mrs      x9, tpidr_el0
+# CHECK: mrs      x9, tpidr_el2
+# CHECK: mrs      x9, tpidr_el3
+# CHECK: mrs      x9, tpidrro_el0
+# CHECK: mrs      x9, tpidr_el1
+# CHECK: mrs      x9, cntfrq_el0
+# CHECK: mrs      x9, cntpct_el0
+# CHECK: mrs      x9, cntvct_el0
+# CHECK: mrs      x9, cntvoff_el2
+# CHECK: mrs      x9, cntkctl_el1
+# CHECK: mrs      x9, cnthctl_el2
+# CHECK: mrs      x9, cntp_tval_el0
+# CHECK: mrs      x9, cnthp_tval_el2
+# CHECK: mrs      x9, cntps_tval_el1
+# CHECK: mrs      x9, cntp_ctl_el0
+# CHECK: mrs      x9, cnthp_ctl_el2
+# CHECK: mrs      x9, cntps_ctl_el1
+# CHECK: mrs      x9, cntp_cval_el0
+# CHECK: mrs      x9, cnthp_cval_el2
+# CHECK: mrs      x9, cntps_cval_el1
+# CHECK: mrs      x9, cntv_tval_el0
+# CHECK: mrs      x9, cntv_ctl_el0
+# CHECK: mrs      x9, cntv_cval_el0
+# CHECK: mrs      x9, pmevcntr0_el0
+# CHECK: mrs      x9, pmevcntr1_el0
+# CHECK: mrs      x9, pmevcntr2_el0
+# CHECK: mrs      x9, pmevcntr3_el0
+# CHECK: mrs      x9, pmevcntr4_el0
+# CHECK: mrs      x9, pmevcntr5_el0
+# CHECK: mrs      x9, pmevcntr6_el0
+# CHECK: mrs      x9, pmevcntr7_el0
+# CHECK: mrs      x9, pmevcntr8_el0
+# CHECK: mrs      x9, pmevcntr9_el0
+# CHECK: mrs      x9, pmevcntr10_el0
+# CHECK: mrs      x9, pmevcntr11_el0
+# CHECK: mrs      x9, pmevcntr12_el0
+# CHECK: mrs      x9, pmevcntr13_el0
+# CHECK: mrs      x9, pmevcntr14_el0
+# CHECK: mrs      x9, pmevcntr15_el0
+# CHECK: mrs      x9, pmevcntr16_el0
+# CHECK: mrs      x9, pmevcntr17_el0
+# CHECK: mrs      x9, pmevcntr18_el0
+# CHECK: mrs      x9, pmevcntr19_el0
+# CHECK: mrs      x9, pmevcntr20_el0
+# CHECK: mrs      x9, pmevcntr21_el0
+# CHECK: mrs      x9, pmevcntr22_el0
+# CHECK: mrs      x9, pmevcntr23_el0
+# CHECK: mrs      x9, pmevcntr24_el0
+# CHECK: mrs      x9, pmevcntr25_el0
+# CHECK: mrs      x9, pmevcntr26_el0
+# CHECK: mrs      x9, pmevcntr27_el0
+# CHECK: mrs      x9, pmevcntr28_el0
+# CHECK: mrs      x9, pmevcntr29_el0
+# CHECK: mrs      x9, pmevcntr30_el0
+# CHECK: mrs      x9, pmccfiltr_el0
+# CHECK: mrs      x9, pmevtyper0_el0
+# CHECK: mrs      x9, pmevtyper1_el0
+# CHECK: mrs      x9, pmevtyper2_el0
+# CHECK: mrs      x9, pmevtyper3_el0
+# CHECK: mrs      x9, pmevtyper4_el0
+# CHECK: mrs      x9, pmevtyper5_el0
+# CHECK: mrs      x9, pmevtyper6_el0
+# CHECK: mrs      x9, pmevtyper7_el0
+# CHECK: mrs      x9, pmevtyper8_el0
+# CHECK: mrs      x9, pmevtyper9_el0
+# CHECK: mrs      x9, pmevtyper10_el0
+# CHECK: mrs      x9, pmevtyper11_el0
+# CHECK: mrs      x9, pmevtyper12_el0
+# CHECK: mrs      x9, pmevtyper13_el0
+# CHECK: mrs      x9, pmevtyper14_el0
+# CHECK: mrs      x9, pmevtyper15_el0
+# CHECK: mrs      x9, pmevtyper16_el0
+# CHECK: mrs      x9, pmevtyper17_el0
+# CHECK: mrs      x9, pmevtyper18_el0
+# CHECK: mrs      x9, pmevtyper19_el0
+# CHECK: mrs      x9, pmevtyper20_el0
+# CHECK: mrs      x9, pmevtyper21_el0
+# CHECK: mrs      x9, pmevtyper22_el0
+# CHECK: mrs      x9, pmevtyper23_el0
+# CHECK: mrs      x9, pmevtyper24_el0
+# CHECK: mrs      x9, pmevtyper25_el0
+# CHECK: mrs      x9, pmevtyper26_el0
+# CHECK: mrs      x9, pmevtyper27_el0
+# CHECK: mrs      x9, pmevtyper28_el0
+# CHECK: mrs      x9, pmevtyper29_el0
+# CHECK: mrs      x9, pmevtyper30_el0
+
+0xc 0x0 0x12 0xd5
+0x4c 0x0 0x10 0xd5
+0xc 0x2 0x10 0xd5
+0x4c 0x2 0x10 0xd5
+0x4c 0x3 0x10 0xd5
+0xc 0x4 0x13 0xd5
+0xc 0x5 0x13 0xd5
+0x4c 0x6 0x10 0xd5
+0xc 0x7 0x14 0xd5
+0x8c 0x0 0x10 0xd5
+0x8c 0x1 0x10 0xd5
+0x8c 0x2 0x10 0xd5
+0x8c 0x3 0x10 0xd5
+0x8c 0x4 0x10 0xd5
+0x8c 0x5 0x10 0xd5
+0x8c 0x6 0x10 0xd5
+0x8c 0x7 0x10 0xd5
+0x8c 0x8 0x10 0xd5
+0x8c 0x9 0x10 0xd5
+0x8c 0xa 0x10 0xd5
+0x8c 0xb 0x10 0xd5
+0x8c 0xc 0x10 0xd5
+0x8c 0xd 0x10 0xd5
+0x8c 0xe 0x10 0xd5
+0x8c 0xf 0x10 0xd5
+0xac 0x0 0x10 0xd5
+0xac 0x1 0x10 0xd5
+0xac 0x2 0x10 0xd5
+0xac 0x3 0x10 0xd5
+0xac 0x4 0x10 0xd5
+0xac 0x5 0x10 0xd5
+0xac 0x6 0x10 0xd5
+0xac 0x7 0x10 0xd5
+0xac 0x8 0x10 0xd5
+0xac 0x9 0x10 0xd5
+0xac 0xa 0x10 0xd5
+0xac 0xb 0x10 0xd5
+0xac 0xc 0x10 0xd5
+0xac 0xd 0x10 0xd5
+0xac 0xe 0x10 0xd5
+0xac 0xf 0x10 0xd5
+0xcc 0x0 0x10 0xd5
+0xcc 0x1 0x10 0xd5
+0xcc 0x2 0x10 0xd5
+0xcc 0x3 0x10 0xd5
+0xcc 0x4 0x10 0xd5
+0xcc 0x5 0x10 0xd5
+0xcc 0x6 0x10 0xd5
+0xcc 0x7 0x10 0xd5
+0xcc 0x8 0x10 0xd5
+0xcc 0x9 0x10 0xd5
+0xcc 0xa 0x10 0xd5
+0xcc 0xb 0x10 0xd5
+0xcc 0xc 0x10 0xd5
+0xcc 0xd 0x10 0xd5
+0xcc 0xe 0x10 0xd5
+0xcc 0xf 0x10 0xd5
+0xec 0x0 0x10 0xd5
+0xec 0x1 0x10 0xd5
+0xec 0x2 0x10 0xd5
+0xec 0x3 0x10 0xd5
+0xec 0x4 0x10 0xd5
+0xec 0x5 0x10 0xd5
+0xec 0x6 0x10 0xd5
+0xec 0x7 0x10 0xd5
+0xec 0x8 0x10 0xd5
+0xec 0x9 0x10 0xd5
+0xec 0xa 0x10 0xd5
+0xec 0xb 0x10 0xd5
+0xec 0xc 0x10 0xd5
+0xec 0xd 0x10 0xd5
+0xec 0xe 0x10 0xd5
+0xec 0xf 0x10 0xd5
+0xc 0x10 0x12 0xd5
+0x8c 0x10 0x10 0xd5
+0x8c 0x13 0x10 0xd5
+0x8c 0x14 0x10 0xd5
+0xcc 0x78 0x10 0xd5
+0xcc 0x79 0x10 0xd5
+0xc 0x0 0x1a 0xd5
+0xc 0x0 0x1c 0xd5
+0xac 0x0 0x1c 0xd5
+0xc 0x10 0x18 0xd5
+0xc 0x10 0x1c 0xd5
+0xc 0x10 0x1e 0xd5
+0x2c 0x10 0x18 0xd5
+0x2c 0x10 0x1c 0xd5
+0x2c 0x10 0x1e 0xd5
+0x4c 0x10 0x18 0xd5
+0xc 0x11 0x1c 0xd5
+0xc 0x11 0x1e 0xd5
+0x2c 0x11 0x1c 0xd5
+0x2c 0x11 0x1e 0xd5
+0x4c 0x11 0x1c 0xd5
+0x4c 0x11 0x1e 0xd5
+0x6c 0x11 0x1c 0xd5
+0xec 0x11 0x1c 0xd5
+0x2c 0x13 0x1e 0xd5
+0xc 0x20 0x18 0xd5
+0xc 0x20 0x1c 0xd5
+0xc 0x20 0x1e 0xd5
+0x2c 0x20 0x18 0xd5
+0x4c 0x20 0x18 0xd5
+0x4c 0x20 0x1c 0xd5
+0x4c 0x20 0x1e 0xd5
+0xc 0x21 0x1c 0xd5
+0x4c 0x21 0x1c 0xd5
+0xc 0x30 0x1c 0xd5
+0xc 0x40 0x18 0xd5
+0xc 0x40 0x1c 0xd5
+0xc 0x40 0x1e 0xd5
+0x2c 0x40 0x18 0xd5
+0x2c 0x40 0x1c 0xd5
+0x2c 0x40 0x1e 0xd5
+0xc 0x41 0x18 0xd5
+0xc 0x41 0x1c 0xd5
+0xc 0x41 0x1e 0xd5
+0xc 0x42 0x18 0xd5
+0xc 0x42 0x1b 0xd5
+0x2c 0x42 0x1b 0xd5
+0x4c 0x42 0x18 0xd5
+0xc 0x43 0x1c 0xd5
+0x2c 0x43 0x1c 0xd5
+0x4c 0x43 0x1c 0xd5
+0x6c 0x43 0x1c 0xd5
+0xc 0x44 0x1b 0xd5
+0x2c 0x44 0x1b 0xd5
+0xc 0x45 0x1b 0xd5
+0x2c 0x45 0x1b 0xd5
+0x2c 0x50 0x1c 0xd5
+0xc 0x51 0x18 0xd5
+0xc 0x51 0x1c 0xd5
+0xc 0x51 0x1e 0xd5
+0x2c 0x51 0x18 0xd5
+0x2c 0x51 0x1c 0xd5
+0x2c 0x51 0x1e 0xd5
+0xc 0x52 0x18 0xd5
+0xc 0x52 0x1c 0xd5
+0xc 0x52 0x1e 0xd5
+0xc 0x53 0x1c 0xd5
+0xc 0x60 0x18 0xd5
+0xc 0x60 0x1c 0xd5
+0xc 0x60 0x1e 0xd5
+0x8c 0x60 0x1c 0xd5
+0xc 0x74 0x18 0xd5
+0xc 0x9c 0x1b 0xd5
+0x2c 0x9c 0x1b 0xd5
+0x4c 0x9c 0x1b 0xd5
+0x6c 0x9c 0x1b 0xd5
+0xac 0x9c 0x1b 0xd5
+0xc 0x9d 0x1b 0xd5
+0x2c 0x9d 0x1b 0xd5
+0x4c 0x9d 0x1b 0xd5
+0xc 0x9e 0x1b 0xd5
+0x2c 0x9e 0x18 0xd5
+0x4c 0x9e 0x18 0xd5
+0x6c 0x9e 0x1b 0xd5
+0xc 0xa2 0x18 0xd5
+0xc 0xa2 0x1c 0xd5
+0xc 0xa2 0x1e 0xd5
+0xc 0xa3 0x18 0xd5
+0xc 0xa3 0x1c 0xd5
+0xc 0xa3 0x1e 0xd5
+0xc 0xc0 0x18 0xd5
+0xc 0xc0 0x1c 0xd5
+0xc 0xc0 0x1e 0xd5
+0x4c 0xc0 0x18 0xd5
+0x4c 0xc0 0x1c 0xd5
+0x4c 0xc0 0x1e 0xd5
+0x4c 0xd0 0x1b 0xd5
+0x4c 0xd0 0x1c 0xd5
+0x4c 0xd0 0x1e 0xd5
+0x6c 0xd0 0x1b 0xd5
+0x8c 0xd0 0x18 0xd5
+0xc 0xe0 0x1b 0xd5
+0x6c 0xe0 0x1c 0xd5
+0xc 0xe1 0x18 0xd5
+0xc 0xe1 0x1c 0xd5
+0xc 0xe2 0x1b 0xd5
+0xc 0xe2 0x1c 0xd5
+0xc 0xe2 0x1f 0xd5
+0x2c 0xe2 0x1b 0xd5
+0x2c 0xe2 0x1c 0xd5
+0x2c 0xe2 0x1f 0xd5
+0x4c 0xe2 0x1b 0xd5
+0x4c 0xe2 0x1c 0xd5
+0x4c 0xe2 0x1f 0xd5
+0xc 0xe3 0x1b 0xd5
+0x2c 0xe3 0x1b 0xd5
+0x4c 0xe3 0x1b 0xd5
+0xc 0xe8 0x1b 0xd5
+0x2c 0xe8 0x1b 0xd5
+0x4c 0xe8 0x1b 0xd5
+0x6c 0xe8 0x1b 0xd5
+0x8c 0xe8 0x1b 0xd5
+0xac 0xe8 0x1b 0xd5
+0xcc 0xe8 0x1b 0xd5
+0xec 0xe8 0x1b 0xd5
+0xc 0xe9 0x1b 0xd5
+0x2c 0xe9 0x1b 0xd5
+0x4c 0xe9 0x1b 0xd5
+0x6c 0xe9 0x1b 0xd5
+0x8c 0xe9 0x1b 0xd5
+0xac 0xe9 0x1b 0xd5
+0xcc 0xe9 0x1b 0xd5
+0xec 0xe9 0x1b 0xd5
+0xc 0xea 0x1b 0xd5
+0x2c 0xea 0x1b 0xd5
+0x4c 0xea 0x1b 0xd5
+0x6c 0xea 0x1b 0xd5
+0x8c 0xea 0x1b 0xd5
+0xac 0xea 0x1b 0xd5
+0xcc 0xea 0x1b 0xd5
+0xec 0xea 0x1b 0xd5
+0xc 0xeb 0x1b 0xd5
+0x2c 0xeb 0x1b 0xd5
+0x4c 0xeb 0x1b 0xd5
+0x6c 0xeb 0x1b 0xd5
+0x8c 0xeb 0x1b 0xd5
+0xac 0xeb 0x1b 0xd5
+0xcc 0xeb 0x1b 0xd5
+0xec 0xef 0x1b 0xd5
+0xc 0xec 0x1b 0xd5
+0x2c 0xec 0x1b 0xd5
+0x4c 0xec 0x1b 0xd5
+0x6c 0xec 0x1b 0xd5
+0x8c 0xec 0x1b 0xd5
+0xac 0xec 0x1b 0xd5
+0xcc 0xec 0x1b 0xd5
+0xec 0xec 0x1b 0xd5
+0xc 0xed 0x1b 0xd5
+0x2c 0xed 0x1b 0xd5
+0x4c 0xed 0x1b 0xd5
+0x6c 0xed 0x1b 0xd5
+0x8c 0xed 0x1b 0xd5
+0xac 0xed 0x1b 0xd5
+0xcc 0xed 0x1b 0xd5
+0xec 0xed 0x1b 0xd5
+0xc 0xee 0x1b 0xd5
+0x2c 0xee 0x1b 0xd5
+0x4c 0xee 0x1b 0xd5
+0x6c 0xee 0x1b 0xd5
+0x8c 0xee 0x1b 0xd5
+0xac 0xee 0x1b 0xd5
+0xcc 0xee 0x1b 0xd5
+0xec 0xee 0x1b 0xd5
+0xc 0xef 0x1b 0xd5
+0x2c 0xef 0x1b 0xd5
+0x4c 0xef 0x1b 0xd5
+0x6c 0xef 0x1b 0xd5
+0x8c 0xef 0x1b 0xd5
+0xac 0xef 0x1b 0xd5
+0xcc 0xef 0x1b 0xd5
+0x9 0x0 0x32 0xd5
+0x49 0x0 0x30 0xd5
+0x9 0x1 0x33 0xd5
+0x9 0x2 0x30 0xd5
+0x49 0x2 0x30 0xd5
+0x49 0x3 0x30 0xd5
+0x9 0x4 0x33 0xd5
+0x9 0x5 0x33 0xd5
+0x49 0x6 0x30 0xd5
+0x9 0x7 0x34 0xd5
+0x89 0x0 0x30 0xd5
+0x89 0x1 0x30 0xd5
+0x89 0x2 0x30 0xd5
+0x89 0x3 0x30 0xd5
+0x89 0x4 0x30 0xd5
+0x89 0x5 0x30 0xd5
+0x89 0x6 0x30 0xd5
+0x89 0x7 0x30 0xd5
+0x89 0x8 0x30 0xd5
+0x89 0x9 0x30 0xd5
+0x89 0xa 0x30 0xd5
+0x89 0xb 0x30 0xd5
+0x89 0xc 0x30 0xd5
+0x89 0xd 0x30 0xd5
+0x89 0xe 0x30 0xd5
+0x89 0xf 0x30 0xd5
+0xa9 0x0 0x30 0xd5
+0xa9 0x1 0x30 0xd5
+0xa9 0x2 0x30 0xd5
+0xa9 0x3 0x30 0xd5
+0xa9 0x4 0x30 0xd5
+0xa9 0x5 0x30 0xd5
+0xa9 0x6 0x30 0xd5
+0xa9 0x7 0x30 0xd5
+0xa9 0x8 0x30 0xd5
+0xa9 0x9 0x30 0xd5
+0xa9 0xa 0x30 0xd5
+0xa9 0xb 0x30 0xd5
+0xa9 0xc 0x30 0xd5
+0xa9 0xd 0x30 0xd5
+0xa9 0xe 0x30 0xd5
+0xa9 0xf 0x30 0xd5
+0xc9 0x0 0x30 0xd5
+0xc9 0x1 0x30 0xd5
+0xc9 0x2 0x30 0xd5
+0xc9 0x3 0x30 0xd5
+0xc9 0x4 0x30 0xd5
+0xc9 0x5 0x30 0xd5
+0xc9 0x6 0x30 0xd5
+0xc9 0x7 0x30 0xd5
+0xc9 0x8 0x30 0xd5
+0xc9 0x9 0x30 0xd5
+0xc9 0xa 0x30 0xd5
+0xc9 0xb 0x30 0xd5
+0xc9 0xc 0x30 0xd5
+0xc9 0xd 0x30 0xd5
+0xc9 0xe 0x30 0xd5
+0xc9 0xf 0x30 0xd5
+0xe9 0x0 0x30 0xd5
+0xe9 0x1 0x30 0xd5
+0xe9 0x2 0x30 0xd5
+0xe9 0x3 0x30 0xd5
+0xe9 0x4 0x30 0xd5
+0xe9 0x5 0x30 0xd5
+0xe9 0x6 0x30 0xd5
+0xe9 0x7 0x30 0xd5
+0xe9 0x8 0x30 0xd5
+0xe9 0x9 0x30 0xd5
+0xe9 0xa 0x30 0xd5
+0xe9 0xb 0x30 0xd5
+0xe9 0xc 0x30 0xd5
+0xe9 0xd 0x30 0xd5
+0xe9 0xe 0x30 0xd5
+0xe9 0xf 0x30 0xd5
+0x9 0x10 0x30 0xd5
+0x9 0x10 0x32 0xd5
+0x89 0x11 0x30 0xd5
+0x89 0x13 0x30 0xd5
+0x89 0x14 0x30 0xd5
+0xc9 0x78 0x30 0xd5
+0xc9 0x79 0x30 0xd5
+0xc9 0x7e 0x30 0xd5
+0x9 0x0 0x38 0xd5
+0x9 0x0 0x39 0xd5
+0x9 0x0 0x3a 0xd5
+0x9 0x0 0x3c 0xd5
+0x29 0x0 0x39 0xd5
+0x29 0x0 0x3b 0xd5
+0xa9 0x0 0x38 0xd5
+0xa9 0x0 0x3c 0xd5
+0xc9 0x0 0x38 0xd5
+0xe9 0x0 0x39 0xd5
+0xe9 0x0 0x3b 0xd5
+0x9 0x1 0x38 0xd5
+0x29 0x1 0x38 0xd5
+0x49 0x1 0x38 0xd5
+0x69 0x1 0x38 0xd5
+0x89 0x1 0x38 0xd5
+0xa9 0x1 0x38 0xd5
+0xc9 0x1 0x38 0xd5
+0xe9 0x1 0x38 0xd5
+0x9 0x2 0x38 0xd5
+0x29 0x2 0x38 0xd5
+0x49 0x2 0x38 0xd5
+0x69 0x2 0x38 0xd5
+0x89 0x2 0x38 0xd5
+0xa9 0x2 0x38 0xd5
+0x9 0x3 0x38 0xd5
+0x29 0x3 0x38 0xd5
+0x49 0x3 0x38 0xd5
+0x9 0x4 0x38 0xd5
+0x29 0x4 0x38 0xd5
+0x9 0x5 0x38 0xd5
+0x29 0x5 0x38 0xd5
+0x89 0x5 0x38 0xd5
+0xa9 0x5 0x38 0xd5
+0x9 0x6 0x38 0xd5
+0x29 0x6 0x38 0xd5
+0x9 0x7 0x38 0xd5
+0x29 0x7 0x38 0xd5
+0x9 0x10 0x38 0xd5
+0x9 0x10 0x3c 0xd5
+0x9 0x10 0x3e 0xd5
+0x29 0x10 0x38 0xd5
+0x29 0x10 0x3c 0xd5
+0x29 0x10 0x3e 0xd5
+0x49 0x10 0x38 0xd5
+0x9 0x11 0x3c 0xd5
+0x9 0x11 0x3e 0xd5
+0x29 0x11 0x3c 0xd5
+0x29 0x11 0x3e 0xd5
+0x49 0x11 0x3c 0xd5
+0x49 0x11 0x3e 0xd5
+0x69 0x11 0x3c 0xd5
+0xe9 0x11 0x3c 0xd5
+0x29 0x13 0x3e 0xd5
+0x9 0x20 0x38 0xd5
+0x9 0x20 0x3c 0xd5
+0x9 0x20 0x3e 0xd5
+0x29 0x20 0x38 0xd5
+0x49 0x20 0x38 0xd5
+0x49 0x20 0x3c 0xd5
+0x49 0x20 0x3e 0xd5
+0x9 0x21 0x3c 0xd5
+0x49 0x21 0x3c 0xd5
+0x9 0x30 0x3c 0xd5
+0x9 0x40 0x38 0xd5
+0x9 0x40 0x3c 0xd5
+0x9 0x40 0x3e 0xd5
+0x29 0x40 0x38 0xd5
+0x29 0x40 0x3c 0xd5
+0x29 0x40 0x3e 0xd5
+0x9 0x41 0x38 0xd5
+0x9 0x41 0x3c 0xd5
+0x9 0x41 0x3e 0xd5
+0x9 0x42 0x38 0xd5
+0x9 0x42 0x3b 0xd5
+0x29 0x42 0x3b 0xd5
+0x49 0x42 0x38 0xd5
+0x9 0x43 0x3c 0xd5
+0x29 0x43 0x3c 0xd5
+0x49 0x43 0x3c 0xd5
+0x69 0x43 0x3c 0xd5
+0x9 0x44 0x3b 0xd5
+0x29 0x44 0x3b 0xd5
+0x9 0x45 0x3b 0xd5
+0x29 0x45 0x3b 0xd5
+0x29 0x50 0x3c 0xd5
+0x9 0x51 0x38 0xd5
+0x9 0x51 0x3c 0xd5
+0x9 0x51 0x3e 0xd5
+0x29 0x51 0x38 0xd5
+0x29 0x51 0x3c 0xd5
+0x29 0x51 0x3e 0xd5
+0x9 0x52 0x38 0xd5
+0x9 0x52 0x3c 0xd5
+0x9 0x52 0x3e 0xd5
+0x9 0x53 0x3c 0xd5
+0x9 0x60 0x38 0xd5
+0x9 0x60 0x3c 0xd5
+0x9 0x60 0x3e 0xd5
+0x89 0x60 0x3c 0xd5
+0x9 0x74 0x38 0xd5
+0x9 0x9c 0x3b 0xd5
+0x29 0x9c 0x3b 0xd5
+0x49 0x9c 0x3b 0xd5
+0x69 0x9c 0x3b 0xd5
+0xa9 0x9c 0x3b 0xd5
+0xc9 0x9c 0x3b 0xd5
+0xe9 0x9c 0x3b 0xd5
+0x9 0x9d 0x3b 0xd5
+0x29 0x9d 0x3b 0xd5
+0x49 0x9d 0x3b 0xd5
+0x9 0x9e 0x3b 0xd5
+0x29 0x9e 0x38 0xd5
+0x49 0x9e 0x38 0xd5
+0x69 0x9e 0x3b 0xd5
+0x9 0xa2 0x38 0xd5
+0x9 0xa2 0x3c 0xd5
+0x9 0xa2 0x3e 0xd5
+0x9 0xa3 0x38 0xd5
+0x9 0xa3 0x3c 0xd5
+0x9 0xa3 0x3e 0xd5
+0x9 0xc0 0x38 0xd5
+0x9 0xc0 0x3c 0xd5
+0x9 0xc0 0x3e 0xd5
+0x29 0xc0 0x38 0xd5
+0x29 0xc0 0x3c 0xd5
+0x29 0xc0 0x3e 0xd5
+0x49 0xc0 0x38 0xd5
+0x49 0xc0 0x3c 0xd5
+0x49 0xc0 0x3e 0xd5
+0x9 0xc1 0x38 0xd5
+0x29 0xd0 0x38 0xd5
+0x49 0xd0 0x3b 0xd5
+0x49 0xd0 0x3c 0xd5
+0x49 0xd0 0x3e 0xd5
+0x69 0xd0 0x3b 0xd5
+0x89 0xd0 0x38 0xd5
+0x9 0xe0 0x3b 0xd5
+0x29 0xe0 0x3b 0xd5
+0x49 0xe0 0x3b 0xd5
+0x69 0xe0 0x3c 0xd5
+0x9 0xe1 0x38 0xd5
+0x9 0xe1 0x3c 0xd5
+0x9 0xe2 0x3b 0xd5
+0x9 0xe2 0x3c 0xd5
+0x9 0xe2 0x3f 0xd5
+0x29 0xe2 0x3b 0xd5
+0x29 0xe2 0x3c 0xd5
+0x29 0xe2 0x3f 0xd5
+0x49 0xe2 0x3b 0xd5
+0x49 0xe2 0x3c 0xd5
+0x49 0xe2 0x3f 0xd5
+0x9 0xe3 0x3b 0xd5
+0x29 0xe3 0x3b 0xd5
+0x49 0xe3 0x3b 0xd5
+0x9 0xe8 0x3b 0xd5
+0x29 0xe8 0x3b 0xd5
+0x49 0xe8 0x3b 0xd5
+0x69 0xe8 0x3b 0xd5
+0x89 0xe8 0x3b 0xd5
+0xa9 0xe8 0x3b 0xd5
+0xc9 0xe8 0x3b 0xd5
+0xe9 0xe8 0x3b 0xd5
+0x9 0xe9 0x3b 0xd5
+0x29 0xe9 0x3b 0xd5
+0x49 0xe9 0x3b 0xd5
+0x69 0xe9 0x3b 0xd5
+0x89 0xe9 0x3b 0xd5
+0xa9 0xe9 0x3b 0xd5
+0xc9 0xe9 0x3b 0xd5
+0xe9 0xe9 0x3b 0xd5
+0x9 0xea 0x3b 0xd5
+0x29 0xea 0x3b 0xd5
+0x49 0xea 0x3b 0xd5
+0x69 0xea 0x3b 0xd5
+0x89 0xea 0x3b 0xd5
+0xa9 0xea 0x3b 0xd5
+0xc9 0xea 0x3b 0xd5
+0xe9 0xea 0x3b 0xd5
+0x9 0xeb 0x3b 0xd5
+0x29 0xeb 0x3b 0xd5
+0x49 0xeb 0x3b 0xd5
+0x69 0xeb 0x3b 0xd5
+0x89 0xeb 0x3b 0xd5
+0xa9 0xeb 0x3b 0xd5
+0xc9 0xeb 0x3b 0xd5
+0xe9 0xef 0x3b 0xd5
+0x9 0xec 0x3b 0xd5
+0x29 0xec 0x3b 0xd5
+0x49 0xec 0x3b 0xd5
+0x69 0xec 0x3b 0xd5
+0x89 0xec 0x3b 0xd5
+0xa9 0xec 0x3b 0xd5
+0xc9 0xec 0x3b 0xd5
+0xe9 0xec 0x3b 0xd5
+0x9 0xed 0x3b 0xd5
+0x29 0xed 0x3b 0xd5
+0x49 0xed 0x3b 0xd5
+0x69 0xed 0x3b 0xd5
+0x89 0xed 0x3b 0xd5
+0xa9 0xed 0x3b 0xd5
+0xc9 0xed 0x3b 0xd5
+0xe9 0xed 0x3b 0xd5
+0x9 0xee 0x3b 0xd5
+0x29 0xee 0x3b 0xd5
+0x49 0xee 0x3b 0xd5
+0x69 0xee 0x3b 0xd5
+0x89 0xee 0x3b 0xd5
+0xa9 0xee 0x3b 0xd5
+0xc9 0xee 0x3b 0xd5
+0xe9 0xee 0x3b 0xd5
+0x9 0xef 0x3b 0xd5
+0x29 0xef 0x3b 0xd5
+0x49 0xef 0x3b 0xd5
+0x69 0xef 0x3b 0xd5
+0x89 0xef 0x3b 0xd5
+0xa9 0xef 0x3b 0xd5
+0xc9 0xef 0x3b 0xd5
+
+# CHECK: mrs     x12, s3_7_c15_c1_5
+# CHECK: mrs     x13, s3_2_c11_c15_7
+# CHECK: msr     s3_0_c15_c0_0, x12
+# CHECK: msr     s3_7_c11_c13_7, x5
+0xac 0xf1 0x3f 0xd5
+0xed 0xbf 0x3a 0xd5
+0x0c 0xf0 0x18 0xd5
+0xe5 0xbd 0x1f 0xd5
+
+#------------------------------------------------------------------------------
+# Test and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: tbz     x12, #62, #0
+# CHECK: tbz     x12, #62, #4
+# CHECK: tbz     x12, #62, #-32768
+# CHECK: tbnz    x12, #60, #32764
+0x0c 0x00 0xf0 0xb6
+0x2c 0x00 0xf0 0xb6
+0x0c 0x00 0xf4 0xb6
+0xec 0xff 0xe3 0xb7
+
+#------------------------------------------------------------------------------
+# Unconditional branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: b        #4
+# CHECK: b        #-4
+# CHECK: b        #134217724
+0x01 0x00 0x00 0x14
+0xff 0xff 0xff 0x17
+0xff 0xff 0xff 0x15
+
+#------------------------------------------------------------------------------
+# Unconditional branch (register)
+#------------------------------------------------------------------------------
+
+# CHECK: br       x20
+# CHECK: blr      xzr
+# CHECK: ret      x10
+0x80 0x2 0x1f 0xd6
+0xe0 0x3 0x3f 0xd6
+0x40 0x1 0x5f 0xd6
+
+# CHECK: ret
+# CHECK: eret
+# CHECK: drps
+0xc0 0x3 0x5f 0xd6
+0xe0 0x3 0x9f 0xd6
+0xe0 0x3 0xbf 0xd6
+
diff --git a/test/MC/Disassembler/AArch64/basic-a64-undefined.txt b/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
new file mode 100644
index 0000000..a17579c
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
@@ -0,0 +1,43 @@
+# These spawn another process so they're rather expensive. Not many.
+
+# Instructions notionally in the add/sub (extended register) sheet, but with
+# invalid shift amount or "opt" field.
+# RUN: echo "0x00 0x10 0xa0 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x10 0x60 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x14 0x20 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the add/sub (immediate) sheet, but with
+# invalid "shift" field.
+# RUN: echo "0xdf 0x3 0x80 0x91" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0xed 0x8e 0xc4 0x31" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x62 0xfc 0xbf 0x11" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x3 0xff 0xff 0x91" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the load/store (unsigned immediate) sheet.
+# Only unallocated (int-register) variants are: opc=0b11, size=0b10, 0b11
+# RUN: echo "0xd7 0xfc 0xff 0xb9" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0xd7 0xfc 0xcf 0xf9" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the floating-point <-> fixed-point conversion
+# Scale field is 64-<imm> and <imm> should be 1-32 for a 32-bit int register.
+# RUN: echo "0x23 0x01 0x18 0x1e" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x23 0x25 0x42 0x1e" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the logical (shifted register) sheet, but with out
+# of range shift: w-registers can only have 0-31.
+# RUN: echo "0x00 0x80 0x00 0x0a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the move wide (immediate) sheet, but with out
+# of range shift: w-registers can only have 0 or 16.
+# RUN: echo "0x00 0x00 0xc0 0x12" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x12 0x34 0xe0 0x52" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Data-processing instructions are undefined when S=1 and for the 0b0000111 value in opcode:sf
+# RUN: echo "0x00 0x00 0xc0 0x5f" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x56 0x0c 0xc0 0x5a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Data-processing instructions (2 source) are undefined for a value of 0001xx:0:x or 0011xx:0:x for opcode:S:sf
+# RUN: echo "0x00 0x30 0xc1 0x1a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x10 0xc1 0x1a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
new file mode 100644
index 0000000..adb8f75
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
@@ -0,0 +1,96 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Load-store exclusive
+#------------------------------------------------------------------------------
+
+#ldxp x14, x14, [sp]
+0xee 0x3b 0x7f 0xc8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0xee 0x3b 0x7f 0xc8
+
+#ldaxp w19, w19, [x1]
+0x33 0xcc 0x7f 0x88
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x33 0xcc 0x7f 0x88
+
+#------------------------------------------------------------------------------
+# Load-store register (immediate post-indexed)
+#------------------------------------------------------------------------------
+
+0x63 0x44 0x40 0xf8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x63 0x44 0x40 0xf8
+
+0x42 0x14 0xc0 0x38
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x42 0x14 0xc0 0x38
+
+#------------------------------------------------------------------------------
+# Load-store register (immediate pre-indexed)
+#------------------------------------------------------------------------------
+
+0x63 0x4c 0x40 0xf8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x63 0x4c 0x40 0xf8
+
+0x42 0x1c 0xc0 0x38
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x42 0x1c 0xc0 0x38
+
+#------------------------------------------------------------------------------
+# Load-store register pair (offset)
+#------------------------------------------------------------------------------
+
+# Unpredictable if Rt == Rt2 on a load.
+
+0xe3 0x0f 0x40 0xa9
+# CHECK:  warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe3 0x0f 0x40 0xa9
+# CHECK-NEXT: ^
+
+0xe2 0x8b 0x41 0x69
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe2 0x8b 0x41 0x69
+# CHECK-NEXT: ^
+
+0x82 0x88 0x40 0x2d
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x82 0x88 0x40 0x2d
+# CHECK-NEXT: ^
+
+#------------------------------------------------------------------------------
+# Load-store register pair (post-indexed)
+#------------------------------------------------------------------------------
+
+# Unpredictable if Rt == Rt2 on a load.
+
+0xe3 0x0f 0xc0 0xa8
+# CHECK:  warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe3 0x0f 0xc0 0xa8
+# CHECK-NEXT: ^
+
+0xe2 0x8b 0xc1 0x68
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe2 0x8b 0xc1 0x68
+# CHECK-NEXT: ^
+
+0x82 0x88 0xc0 0x2c
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x82 0x88 0xc0 0x2c
+# CHECK-NEXT: ^
+
+# Also unpredictable if writeback clashes with either transfer register
+
+0x63 0x94 0xc0 0xa8
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x63 0x94 0xc0 0xa8
+
+0x69 0x2d 0x81 0xa8
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x69 0x2d 0x81 0xa8
+
+0x29 0xad 0xc0 0x28
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x29 0xad 0xc0 0x28
+
diff --git a/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt b/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
new file mode 100644
index 0000000..7ff495f
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# Stores are OK.
+0xe0 0x83 0x00 0xa9
+# CHECK-NOT: potentially undefined instruction encoding
+# CHECK: stp x0, x0, [sp, #8]
+
diff --git a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
new file mode 100644
index 0000000..775660b
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# None of these instructions should be classified as unpredictable:
+
+# CHECK-NOT: potentially undefined instruction encoding
+
+# Stores from duplicated registers should be fine.
+0xe3 0x0f 0x80 0xa8
+# CHECK: stp x3, x3, [sp], #0
+
+# d5 != x5 so "ldp d5, d6, [x5], #24" is fine.
+0xa5 0x98 0xc1 0x6c
+# CHECK: ldp d5, d6, [x5], #24
+
+# xzr != sp so "stp xzr, xzr, [sp], #8" is fine.
+0xff 0xff 0x80 0xa8
+# CHECK: stp xzr, xzr, [sp], #8
diff --git a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
new file mode 100644
index 0000000..48ea817
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# None of these instructions should be classified as unpredictable:
+
+# CHECK-NOT: potentially undefined instruction encoding
+
+# Stores from duplicated registers should be fine.
+0xe3 0x0f 0x80 0xa9
+# CHECK: stp x3, x3, [sp, #0]!
+
+# d5 != x5 so "ldp d5, d6, [x5, #24]!" is fine.
+0xa5 0x98 0xc1 0x6d
+# CHECK: ldp d5, d6, [x5, #24]!
+
+# xzr != sp so "stp xzr, xzr, [sp, #8]!" is fine.
+0xff 0xff 0x80 0xa9
+# CHECK: stp xzr, xzr, [sp, #8]!
diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg
new file mode 100644
index 0000000..f9df30e
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
index 2d2a628..99da8ce 100644
--- a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
@@ -7,7 +7,7 @@
 # -------------------------------------------------------------------------------------------------
 #
 # A8.6.391 VST1 (multiple single elements)
-# This encoding looks like: vst1.8 {d0,d1,d2}, [r0, :128]
+# This encoding looks like: vst1.8 {d0,d1,d2}, [r0:128]
 # But bits 5-4 for the alignment of 128 encoded as align = 0b10, is available only if <list>
 # contains two or four registers.  rdar://11220250
 0x00 0xf9 0x2f 0x06
diff --git a/test/MC/Disassembler/ARM/neon-tests.txt b/test/MC/Disassembler/ARM/neon-tests.txt
index a7b6b1c..65e9954 100644
--- a/test/MC/Disassembler/ARM/neon-tests.txt
+++ b/test/MC/Disassembler/ARM/neon-tests.txt
@@ -21,10 +21,10 @@
 # CHECK:	vld4.8	{d4, d6, d8, d10}, [r2]
 0x0f 0x41 0x22 0xf4
 
-# CHECK:	vld1.32	{d3[], d4[]}, [r0, :32]!
+# CHECK:	vld1.32	{d3[], d4[]}, [r0:32]!
 0xbd 0x3c 0xa0 0xf4
 
-# CHECK:	vld4.16	{d3[], d5[], d7[], d9[]}, [r0, :64]!
+# CHECK:	vld4.16	{d3[], d5[], d7[], d9[]}, [r0:64]!
 0x7d 0x3f 0xa0 0xf4
 
 # CHECK:	vorr	d0, d15, d15
@@ -75,7 +75,7 @@
 # CHECK:	vbic.i32	q2, #0xa900
 0x79 0x43 0x82 0xf3
 
-# CHECK:	vst2.32	{d16, d18}, [r2, :64], r2
+# CHECK:	vst2.32	{d16, d18}, [r2:64], r2
 0x92 0x9 0x42 0xf4
 
 # CHECK:	vmov.s8	r0, d8[1]
diff --git a/test/MC/Disassembler/ARM/neon.txt b/test/MC/Disassembler/ARM/neon.txt
index 649424a..cd5f418 100644
--- a/test/MC/Disassembler/ARM/neon.txt
+++ b/test/MC/Disassembler/ARM/neon.txt
@@ -1638,7 +1638,7 @@
 
 
 0x1f 0x07 0x60 0xf4
-# CHECK: vld1.8	{d16}, [r0, :64]
+# CHECK: vld1.8	{d16}, [r0:64]
 0x4f 0x07 0x60 0xf4
 # CHECK: vld1.16	{d16}, [r0]
 0x8f 0x07 0x60 0xf4
@@ -1646,37 +1646,37 @@
 0xcf 0x07 0x60 0xf4
 # CHECK: vld1.64	{d16}, [r0]
 0x1f 0x0a 0x60 0xf4
-# CHECK: vld1.8	{d16, d17}, [r0, :64]
+# CHECK: vld1.8	{d16, d17}, [r0:64]
 0x6f 0x0a 0x60 0xf4
-# CHECK: vld1.16	{d16, d17}, [r0, :128]
+# CHECK: vld1.16	{d16, d17}, [r0:128]
 0x8f 0x0a 0x60 0xf4
 # CHECK: vld1.32	{d16, d17}, [r0]
 0xcf 0x0a 0x60 0xf4
 # CHECK: vld1.64	{d16, d17}, [r0]
 
 0x1f 0x08 0x60 0xf4
-# CHECK: vld2.8	{d16, d17}, [r0, :64]
+# CHECK: vld2.8	{d16, d17}, [r0:64]
 0x6f 0x08 0x60 0xf4
-# CHECK: vld2.16	{d16, d17}, [r0, :128]
+# CHECK: vld2.16	{d16, d17}, [r0:128]
 0x8f 0x08 0x60 0xf4
 # CHECK: vld2.32	{d16, d17}, [r0]
 0x1f 0x03 0x60 0xf4
-# CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64]
 0x6f 0x03 0x60 0xf4
-# CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vld2.16	{d16, d17, d18, d19}, [r0:128]
 0xbf 0x03 0x60 0xf4
-# CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vld2.32	{d16, d17, d18, d19}, [r0:256]
 
 0x1f 0x04 0x60 0xf4
-# CHECK: vld3.8	{d16, d17, d18}, [r0, :64]
+# CHECK: vld3.8	{d16, d17, d18}, [r0:64]
 0x4f 0x04 0x60 0xf4
 # CHECK: vld3.16	{d16, d17, d18}, [r0]
 0x8f 0x04 0x60 0xf4
 # CHECK: vld3.32	{d16, d17, d18}, [r0]
 0x1d 0x05 0x60 0xf4
-# CHECK: vld3.8	{d16, d18, d20}, [r0, :64]!
+# CHECK: vld3.8	{d16, d18, d20}, [r0:64]!
 0x1d 0x15 0x60 0xf4
-# CHECK: vld3.8	{d17, d19, d21}, [r0, :64]!
+# CHECK: vld3.8	{d17, d19, d21}, [r0:64]!
 0x4d 0x05 0x60 0xf4
 # CHECK: vld3.16	{d16, d18, d20}, [r0]!
 0x4d 0x15 0x60 0xf4
@@ -1687,15 +1687,15 @@
 # CHECK: vld3.32	{d17, d19, d21}, [r0]!
 
 0x1f 0x00 0x60 0xf4
-# CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vld4.8	{d16, d17, d18, d19}, [r0:64]
 0x6f 0x00 0x60 0xf4
-# CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vld4.16	{d16, d17, d18, d19}, [r0:128]
 0xbf 0x00 0x60 0xf4
-# CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vld4.32	{d16, d17, d18, d19}, [r0:256]
 0x3d 0x01 0x60 0xf4
-# CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]!
+# CHECK: vld4.8	{d16, d18, d20, d22}, [r0:256]!
 0x3d 0x11 0x60 0xf4
-# CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+# CHECK: vld4.8	{d17, d19, d21, d23}, [r0:256]!
 0x4d 0x01 0x60 0xf4
 # CHECK: vld4.16	{d16, d18, d20, d22}, [r0]!
 0x4d 0x11 0x60 0xf4
@@ -1708,20 +1708,20 @@
 0x6f 0x00 0xe0 0xf4
 # CHECK: vld1.8	{d16[3]}, [r0]
 0x9f 0x04 0xe0 0xf4
-# CHECK: vld1.16	{d16[2]}, [r0, :16]
+# CHECK: vld1.16	{d16[2]}, [r0:16]
 0xbf 0x08 0xe0 0xf4
-# CHECK: vld1.32	{d16[1]}, [r0, :32]
+# CHECK: vld1.32	{d16[1]}, [r0:32]
 
 0x3f 0x01 0xe0 0xf4
-# CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16]
+# CHECK: vld2.8	{d16[1], d17[1]}, [r0:16]
 0x5f 0x05 0xe0 0xf4
-# CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32]
+# CHECK: vld2.16	{d16[1], d17[1]}, [r0:32]
 0x8f 0x09 0xe0 0xf4
 # CHECK: vld2.32	{d16[1], d17[1]}, [r0]
 0x6f 0x15 0xe0 0xf4
 # CHECK: vld2.16	{d17[1], d19[1]}, [r0]
 0x5f 0x19 0xe0 0xf4
-# CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64]
+# CHECK: vld2.32	{d17[0], d19[0]}, [r0:64]
 
 0x2f 0x02 0xe0 0xf4
 # CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -1754,44 +1754,44 @@
 0xa5 0x0e 0xa4 0xf4
 
 0x3f 0x03 0xe0 0xf4
-# CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+# CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 0x4f 0x07 0xe0 0xf4
 # CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
 0xaf 0x0b 0xe0 0xf4
-# CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+# CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 0x7f 0x07 0xe0 0xf4
-# CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+# CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64]
 0x4f 0x1b 0xe0 0xf4
 # CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
 0x0f 0x0f 0xa4 0xf4
 # CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4]
 0x3f 0x0f 0xa4 0xf4
-# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32]
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32]
 0x1d 0x0f 0xa4 0xf4
-# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4, :32]!
+# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4:32]!
 0x35 0x0f 0xa4 0xf4
-# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32], r5
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32], r5
 0x4f 0x0f 0xa4 0xf4
 # CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4]
 0x7f 0x0f 0xa4 0xf4
-# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64]
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64]
 0x5d 0x0f 0xa4 0xf4
-# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4, :64]!
+# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4:64]!
 0x75 0x0f 0xa4 0xf4
-# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64], r5
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64], r5
 0x8f 0x0f 0xa4 0xf4
 # CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4]
 0xbf 0x0f 0xa4 0xf4
-# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :64]
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4:64]
 0xdd 0x0f 0xa4 0xf4
-# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4, :128]!
+# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4:128]!
 0xf5 0x0f 0xa4 0xf4
-# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :128], r5
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4:128], r5
 
 
 0x1f 0x07 0x40 0xf4
-# CHECK: vst1.8	{d16}, [r0, :64]
+# CHECK: vst1.8	{d16}, [r0:64]
 0x4f 0x07 0x40 0xf4
 # CHECK: vst1.16	{d16}, [r0]
 0x8f 0x07 0x40 0xf4
@@ -1799,37 +1799,37 @@
 0xcf 0x07 0x40 0xf4
 # CHECK: vst1.64	{d16}, [r0]
 0x1f 0x0a 0x40 0xf4
-# CHECK: vst1.8	{d16, d17}, [r0, :64]
+# CHECK: vst1.8	{d16, d17}, [r0:64]
 0x6f 0x0a 0x40 0xf4
-# CHECK: vst1.16	{d16, d17}, [r0, :128]
+# CHECK: vst1.16	{d16, d17}, [r0:128]
 0x8f 0x0a 0x40 0xf4
 # CHECK: vst1.32	{d16, d17}, [r0]
 0xcf 0x0a 0x40 0xf4
 # CHECK: vst1.64	{d16, d17}, [r0]
 
 0x1f 0x08 0x40 0xf4
-# CHECK: vst2.8	{d16, d17}, [r0, :64]
+# CHECK: vst2.8	{d16, d17}, [r0:64]
 0x6f 0x08 0x40 0xf4
-# CHECK: vst2.16	{d16, d17}, [r0, :128]
+# CHECK: vst2.16	{d16, d17}, [r0:128]
 0x8f 0x08 0x40 0xf4
 # CHECK: vst2.32	{d16, d17}, [r0]
 0x1f 0x03 0x40 0xf4
-# CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64]
 0x6f 0x03 0x40 0xf4
-# CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vst2.16	{d16, d17, d18, d19}, [r0:128]
 0xbf 0x03 0x40 0xf4
-# CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vst2.32	{d16, d17, d18, d19}, [r0:256]
 
 0x1f 0x04 0x40 0xf4
-# CHECK: vst3.8	{d16, d17, d18}, [r0, :64]
+# CHECK: vst3.8	{d16, d17, d18}, [r0:64]
 0x4f 0x04 0x40 0xf4
 # CHECK: vst3.16	{d16, d17, d18}, [r0]
 0x8f 0x04 0x40 0xf4
 # CHECK: vst3.32	{d16, d17, d18}, [r0]
 0x1d 0x05 0x40 0xf4
-# CHECK: vst3.8	{d16, d18, d20}, [r0, :64]!
+# CHECK: vst3.8	{d16, d18, d20}, [r0:64]!
 0x1d 0x15 0x40 0xf4
-# CHECK: vst3.8	{d17, d19, d21}, [r0, :64]!
+# CHECK: vst3.8	{d17, d19, d21}, [r0:64]!
 0x4d 0x05 0x40 0xf4
 # CHECK: vst3.16	{d16, d18, d20}, [r0]!
 0x4d 0x15 0x40 0xf4
@@ -1840,13 +1840,13 @@
 # CHECK: vst3.32	{d17, d19, d21}, [r0]!
 
 0x1f 0x00 0x40 0xf4
-# CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vst4.8	{d16, d17, d18, d19}, [r0:64]
 0x6f 0x00 0x40 0xf4
-# CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vst4.16	{d16, d17, d18, d19}, [r0:128]
 0x3d 0x01 0x40 0xf4
-# CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]!
+# CHECK: vst4.8	{d16, d18, d20, d22}, [r0:256]!
 0x3d 0x11 0x40 0xf4
-# CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+# CHECK: vst4.8	{d17, d19, d21, d23}, [r0:256]!
 0x4d 0x01 0x40 0xf4
 # CHECK: vst4.16	{d16, d18, d20, d22}, [r0]!
 0x4d 0x11 0x40 0xf4
@@ -1857,15 +1857,15 @@
 # CHECK: vst4.32	{d17, d19, d21, d23}, [r0]!
 
 0x3f 0x01 0xc0 0xf4
-# CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16]
+# CHECK: vst2.8	{d16[1], d17[1]}, [r0:16]
 0x5f 0x05 0xc0 0xf4
-# CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32]
+# CHECK: vst2.16	{d16[1], d17[1]}, [r0:32]
 0x8f 0x09 0xc0 0xf4
 # CHECK: vst2.32	{d16[1], d17[1]}, [r0]
 0x6f 0x15 0xc0 0xf4
 # CHECK: vst2.16	{d17[1], d19[1]}, [r0]
 0x5f 0x19 0xc0 0xf4
-# CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64]
+# CHECK: vst2.32	{d17[0], d19[0]}, [r0:64]
 
 0x2f 0x02 0xc0 0xf4
 # CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -1879,13 +1879,13 @@
 # CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0]
 
 0x3f 0x03 0xc0 0xf4
-# CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+# CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 0x4f 0x07 0xc0 0xf4
 # CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
 0xaf 0x0b 0xc0 0xf4
-# CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+# CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 0xff 0x17 0xc0 0xf4
-# CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+# CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 0x4f 0x1b 0xc0 0xf4
 # CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
@@ -1920,11 +1920,11 @@
 # CHECK: vcvttmi.f32.f16	s2, s19
 
 0x1d 0x76 0x66 0xf4
-# CHECK: vld1.8	{d23, d24, d25}, [r6, :64]!
+# CHECK: vld1.8	{d23, d24, d25}, [r6:64]!
 0x9d 0x62 0x6f 0xf4
-# CHECK: vld1.32	{d22, d23, d24, d25}, [pc, :64]!
+# CHECK: vld1.32	{d22, d23, d24, d25}, [pc:64]!
 0x9d 0xaa 0x41 0xf4
-# CHECK: vst1.32	{d26, d27}, [r1, :64]!
+# CHECK: vst1.32	{d26, d27}, [r1:64]!
 
 0x10 0x0f 0x83 0xf2
 0x50 0x0f 0x83 0xf2
diff --git a/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt b/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
index e53739e..6506143 100644
--- a/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
+++ b/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
@@ -28,13 +28,13 @@
 0xa0 0xf9 0xd0 0x04
 
 # CHECK: vld1.16 {d0[0]}, [r0], r0      @ encoding: [0xa0,0xf9,0x00,0x04]
-# CHECK: vld1.16 {d0[0]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x10,0x04]
+# CHECK: vld1.16 {d0[0]}, [r0:16], r0 @ encoding: [0xa0,0xf9,0x10,0x04]
 # CHECK: vld1.16 {d0[1]}, [r0], r0      @ encoding: [0xa0,0xf9,0x40,0x04]
-# CHECK: vld1.16 {d0[1]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x50,0x04]
+# CHECK: vld1.16 {d0[1]}, [r0:16], r0 @ encoding: [0xa0,0xf9,0x50,0x04]
 # CHECK: vld1.16 {d0[2]}, [r0], r0      @ encoding: [0xa0,0xf9,0x80,0x04]
-# CHECK: vld1.16 {d0[2]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x90,0x04]
+# CHECK: vld1.16 {d0[2]}, [r0:16], r0 @ encoding: [0xa0,0xf9,0x90,0x04]
 # CHECK: vld1.16 {d0[3]}, [r0], r0      @ encoding: [0xa0,0xf9,0xc0,0x04]
-# CHECK: vld1.16 {d0[3]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0xd0,0x04]
+# CHECK: vld1.16 {d0[3]}, [r0:16], r0 @ encoding: [0xa0,0xf9,0xd0,0x04]
 
 0xa0 0xf9 0x00 0x08
 0xa0 0xf9 0x30 0x08
@@ -42,20 +42,20 @@
 0xa0 0xf9 0xb0 0x08
 
 # CHECK: vld1.32 {d0[0]}, [r0], r0      @ encoding: [0xa0,0xf9,0x00,0x08]
-# CHECK: vld1.32 {d0[0]}, [r0, :32], r0 @ encoding: [0xa0,0xf9,0x30,0x08]
+# CHECK: vld1.32 {d0[0]}, [r0:32], r0 @ encoding: [0xa0,0xf9,0x30,0x08]
 # CHECK: vld1.32 {d0[1]}, [r0], r0      @ encoding: [0xa0,0xf9,0x80,0x08]
-# CHECK: vld1.32 {d0[1]}, [r0, :32], r0 @ encoding: [0xa0,0xf9,0xb0,0x08]
+# CHECK: vld1.32 {d0[1]}, [r0:32], r0 @ encoding: [0xa0,0xf9,0xb0,0x08]
 
 0xa0 0xf9 0x1f 0x04
 0xa0 0xf9 0x8f 0x00
 
-# CHECK: vld1.16 {d0[0]}, [r0, :16] @ encoding: [0xa0,0xf9,0x1f,0x04]
+# CHECK: vld1.16 {d0[0]}, [r0:16] @ encoding: [0xa0,0xf9,0x1f,0x04]
 # CHECK: vld1.8  {d0[4]}, [r0]      @ encoding: [0xa0,0xf9,0x8f,0x00]
 
 0xa0 0xf9 0x1d 0x04
 0xa0 0xf9 0x8d 0x00
 
-# CHECK: vld1.16 {d0[0]}, [r0, :16]! @ encoding: [0xa0,0xf9,0x1d,0x04]
+# CHECK: vld1.16 {d0[0]}, [r0:16]! @ encoding: [0xa0,0xf9,0x1d,0x04]
 # CHECK: vld1.8  {d0[4]}, [r0]!      @ encoding: [0xa0,0xf9,0x8d,0x00]
 
 0xa5 0xf9 0x10 0x04
@@ -63,15 +63,15 @@
 0xae 0xf9 0x1a 0x04
 0xa5 0xf9 0x1a 0x94
 
-# CHECK: vld1.16 {d0[0]}, [r5, :16], r0  @ encoding: [0xa5,0xf9,0x10,0x04]
-# CHECK: vld1.16 {d0[0]}, [r5, :16], r10 @ encoding: [0xa5,0xf9,0x1a,0x04]
-# CHECK: vld1.16 {d0[0]}, [lr, :16], r10 @ encoding: [0xae,0xf9,0x1a,0x04]
-# CHECK: vld1.16 {d9[0]}, [r5, :16], r10 @ encoding: [0xa5,0xf9,0x1a,0x94]
+# CHECK: vld1.16 {d0[0]}, [r5:16], r0  @ encoding: [0xa5,0xf9,0x10,0x04]
+# CHECK: vld1.16 {d0[0]}, [r5:16], r10 @ encoding: [0xa5,0xf9,0x1a,0x04]
+# CHECK: vld1.16 {d0[0]}, [lr:16], r10 @ encoding: [0xae,0xf9,0x1a,0x04]
+# CHECK: vld1.16 {d9[0]}, [r5:16], r10 @ encoding: [0xa5,0xf9,0x1a,0x94]
 
 0xa0 0xf9 0x20 0x0b
 0xa0 0xf9 0x20 0x07
 0xa0 0xf9 0x20 0x03
 
-# CHECK: vld4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0, :128], r0 @ encoding: [0xa0,0xf9,0x20,0x0b]
+# CHECK: vld4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0:128], r0 @ encoding: [0xa0,0xf9,0x20,0x0b]
 # CHECK: vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r0       @ encoding: [0xa0,0xf9,0x20,0x07]
 # CHECK: vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r0], r0       @ encoding: [0xa0,0xf9,0x20,0x03]
diff --git a/test/MC/Disassembler/ARM/neont-VST-reencoding.txt b/test/MC/Disassembler/ARM/neont-VST-reencoding.txt
index eb3722c..5119d92 100644
--- a/test/MC/Disassembler/ARM/neont-VST-reencoding.txt
+++ b/test/MC/Disassembler/ARM/neont-VST-reencoding.txt
@@ -28,13 +28,13 @@
 0xc9 0xf9 0xd9 0x94
 
 # CHECK: vst1.16 {d0[0]},  [r0], r0      @ encoding: [0x80,0xf9,0x00,0x04]
-# CHECK: vst1.16 {d16[0]}, [r3, :16], r3 @ encoding: [0xc3,0xf9,0x13,0x04]
+# CHECK: vst1.16 {d16[0]}, [r3:16], r3 @ encoding: [0xc3,0xf9,0x13,0x04]
 # CHECK: vst1.16 {d16[1]}, [r4], r3      @ encoding: [0xc4,0xf9,0x43,0x04]
-# CHECK: vst1.16 {d16[1]}, [r5, :16], r5 @ encoding: [0xc5,0xf9,0x55,0x04]
+# CHECK: vst1.16 {d16[1]}, [r5:16], r5 @ encoding: [0xc5,0xf9,0x55,0x04]
 # CHECK: vst1.16 {d16[2]}, [r6], r5      @ encoding: [0xc6,0xf9,0x85,0x04]
-# CHECK: vst1.16 {d23[2]}, [r7, :16], r5 @ encoding: [0xc7,0xf9,0x95,0x74]
+# CHECK: vst1.16 {d23[2]}, [r7:16], r5 @ encoding: [0xc7,0xf9,0x95,0x74]
 # CHECK: vst1.16 {d24[3]}, [r8], r7      @ encoding: [0xc8,0xf9,0xc7,0x84]
-# CHECK: vst1.16 {d25[3]}, [r9, :16], r9 @ encoding: [0xc9,0xf9,0xd9,0x94]
+# CHECK: vst1.16 {d25[3]}, [r9:16], r9 @ encoding: [0xc9,0xf9,0xd9,0x94]
 
 0x8a 0xf9 0x01 0xa8
 0xcb 0xf9 0x32 0x18
@@ -42,20 +42,20 @@
 0xcd 0xf9 0xb4 0x28
 
 # CHECK: vst1.32 {d10[0]}, [r10], r1      @ encoding: [0x8a,0xf9,0x01,0xa8]
-# CHECK: vst1.32 {d17[0]}, [r11, :32], r2 @ encoding: [0xcb,0xf9,0x32,0x18]
+# CHECK: vst1.32 {d17[0]}, [r11:32], r2 @ encoding: [0xcb,0xf9,0x32,0x18]
 # CHECK: vst1.32 {d11[1]}, [r12], r3      @ encoding: [0x8c,0xf9,0x83,0xb8]
-# CHECK: vst1.32 {d18[1]}, [sp, :32], r4  @ encoding: [0xcd,0xf9,0xb4,0x28]
+# CHECK: vst1.32 {d18[1]}, [sp:32], r4  @ encoding: [0xcd,0xf9,0xb4,0x28]
 
 0x81 0xf9 0x1f 0x44
 0x82 0xf9 0x8f 0x30
 
-# CHECK: vst1.16 {d4[0]}, [r1, :16] @ encoding: [0x81,0xf9,0x1f,0x44]
+# CHECK: vst1.16 {d4[0]}, [r1:16] @ encoding: [0x81,0xf9,0x1f,0x44]
 # CHECK: vst1.8  {d3[4]}, [r2]      @ encoding: [0x82,0xf9,0x8f,0x30]
 
 0x83 0xf9 0x1d 0x24
 0x84 0xf9 0x8d 0x10
 
-# CHECK: vst1.16 {d2[0]}, [r3, :16]! @ encoding: [0x83,0xf9,0x1d,0x24]
+# CHECK: vst1.16 {d2[0]}, [r3:16]! @ encoding: [0x83,0xf9,0x1d,0x24]
 # CHECK: vst1.8  {d1[4]}, [r4]!      @ encoding: [0x84,0xf9,0x8d,0x10]
 
 0x85 0xf9 0x10 0x04
@@ -63,15 +63,15 @@
 0x8e 0xf9 0x1a 0x84
 0x85 0xf9 0x1a 0x94
 
-# CHECK: vst1.16 {d0[0]}, [r5, :16], r0  @ encoding: [0x85,0xf9,0x10,0x04]
-# CHECK: vst1.16 {d7[0]}, [r5, :16], r10 @ encoding: [0x85,0xf9,0x1a,0x74]
-# CHECK: vst1.16 {d8[0]}, [lr, :16], r10 @ encoding: [0x8e,0xf9,0x1a,0x84]
-# CHECK: vst1.16 {d9[0]}, [r5, :16], r10 @ encoding: [0x85,0xf9,0x1a,0x94]
+# CHECK: vst1.16 {d0[0]}, [r5:16], r0  @ encoding: [0x85,0xf9,0x10,0x04]
+# CHECK: vst1.16 {d7[0]}, [r5:16], r10 @ encoding: [0x85,0xf9,0x1a,0x74]
+# CHECK: vst1.16 {d8[0]}, [lr:16], r10 @ encoding: [0x8e,0xf9,0x1a,0x84]
+# CHECK: vst1.16 {d9[0]}, [r5:16], r10 @ encoding: [0x85,0xf9,0x1a,0x94]
 
 0x81 0xf9 0x24 0x0b
 0x82 0xf9 0x25 0x07
 0x83 0xf9 0x26 0x03
 
-# CHECK: vst4.32 {d0[0], d1[0], d2[0], d3[0]}, [r1, :128], r4 @ encoding: [0x81,0xf9,0x24,0x0b]
+# CHECK: vst4.32 {d0[0], d1[0], d2[0], d3[0]}, [r1:128], r4 @ encoding: [0x81,0xf9,0x24,0x0b]
 # CHECK: vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r2], r5       @ encoding: [0x82,0xf9,0x25,0x07]
 # CHECK: vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r3], r6       @ encoding: [0x83,0xf9,0x26,0x03]
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index 7d7010f..3374578 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -1379,7 +1379,7 @@
 # CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21
 
 0x60 0xf9 0x1f 0x07
-# CHECK: vld1.8	{d16}, [r0, :64]
+# CHECK: vld1.8	{d16}, [r0:64]
 0x60 0xf9 0x4f 0x07
 # CHECK: vld1.16	{d16}, [r0]
 0x60 0xf9 0x8f 0x07
@@ -1387,37 +1387,37 @@
 0x60 0xf9 0xcf 0x07
 # CHECK: vld1.64	{d16}, [r0]
 0x60 0xf9 0x1f 0x0a
-# CHECK: vld1.8	{d16, d17}, [r0, :64]
+# CHECK: vld1.8	{d16, d17}, [r0:64]
 0x60 0xf9 0x6f 0x0a
-# CHECK: vld1.16	{d16, d17}, [r0, :128]
+# CHECK: vld1.16	{d16, d17}, [r0:128]
 0x60 0xf9 0x8f 0x0a
 # CHECK: vld1.32	{d16, d17}, [r0]
 0x60 0xf9 0xcf 0x0a
 # CHECK: vld1.64	{d16, d17}, [r0]
 
 0x60 0xf9 0x1f 0x08
-# CHECK: vld2.8	{d16, d17}, [r0, :64]
+# CHECK: vld2.8	{d16, d17}, [r0:64]
 0x60 0xf9 0x6f 0x08
-# CHECK: vld2.16	{d16, d17}, [r0, :128]
+# CHECK: vld2.16	{d16, d17}, [r0:128]
 0x60 0xf9 0x8f 0x08
 # CHECK: vld2.32	{d16, d17}, [r0]
 0x60 0xf9 0x1f 0x03
-# CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64]
 0x60 0xf9 0x6f 0x03
-# CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vld2.16	{d16, d17, d18, d19}, [r0:128]
 0x60 0xf9 0xbf 0x03
-# CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vld2.32	{d16, d17, d18, d19}, [r0:256]
 
 0x60 0xf9 0x1f 0x04
-# CHECK: vld3.8	{d16, d17, d18}, [r0, :64]
+# CHECK: vld3.8	{d16, d17, d18}, [r0:64]
 0x60 0xf9 0x4f 0x04
 # CHECK: vld3.16	{d16, d17, d18}, [r0]
 0x60 0xf9 0x8f 0x04
 # CHECK: vld3.32	{d16, d17, d18}, [r0]
 0x60 0xf9 0x1d 0x05
-# CHECK: vld3.8	{d16, d18, d20}, [r0, :64]!
+# CHECK: vld3.8	{d16, d18, d20}, [r0:64]!
 0x60 0xf9 0x1d 0x15
-# CHECK: vld3.8	{d17, d19, d21}, [r0, :64]!
+# CHECK: vld3.8	{d17, d19, d21}, [r0:64]!
 0x60 0xf9 0x4d 0x05
 # CHECK: vld3.16	{d16, d18, d20}, [r0]!
 0x60 0xf9 0x4d 0x15
@@ -1428,15 +1428,15 @@
 # CHECK: vld3.32	{d17, d19, d21}, [r0]!
 
 0x60 0xf9 0x1f 0x00
-# CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vld4.8	{d16, d17, d18, d19}, [r0:64]
 0x60 0xf9 0x6f 0x00
-# CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vld4.16	{d16, d17, d18, d19}, [r0:128]
 0x60 0xf9 0xbf 0x00
-# CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vld4.32	{d16, d17, d18, d19}, [r0:256]
 0x60 0xf9 0x3d 0x01
-# CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]!
+# CHECK: vld4.8	{d16, d18, d20, d22}, [r0:256]!
 0x60 0xf9 0x3d 0x11
-# CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+# CHECK: vld4.8	{d17, d19, d21, d23}, [r0:256]!
 0x60 0xf9 0x4d 0x01
 # CHECK: vld4.16	{d16, d18, d20, d22}, [r0]!
 0x60 0xf9 0x4d 0x11
@@ -1449,20 +1449,20 @@
 0xe0 0xf9 0x6f 0x00
 # CHECK: vld1.8	{d16[3]}, [r0]
 0xe0 0xf9 0x9f 0x04
-# CHECK: vld1.16	{d16[2]}, [r0, :16]
+# CHECK: vld1.16	{d16[2]}, [r0:16]
 0xe0 0xf9 0xbf 0x08
-# CHECK: vld1.32	{d16[1]}, [r0, :32]
+# CHECK: vld1.32	{d16[1]}, [r0:32]
 
 0xe0 0xf9 0x3f 0x01
-# CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16]
+# CHECK: vld2.8	{d16[1], d17[1]}, [r0:16]
 0xe0 0xf9 0x5f 0x05
-# CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32]
+# CHECK: vld2.16	{d16[1], d17[1]}, [r0:32]
 0xe0 0xf9 0x8f 0x09
 # CHECK: vld2.32	{d16[1], d17[1]}, [r0]
 0xe0 0xf9 0x6f 0x15
 # CHECK: vld2.16	{d17[1], d19[1]}, [r0]
 0xe0 0xf9 0x5f 0x19
-# CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64]
+# CHECK: vld2.32	{d17[0], d19[0]}, [r0:64]
 
 0xe0 0xf9 0x2f 0x02
 # CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -1495,43 +1495,43 @@
 # CHECK: vld3.32	{d0[], d2[], d4[]}, [r4], r5
 
 0xe0 0xf9 0x3f 0x03
-# CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+# CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 0xe0 0xf9 0x4f 0x07
 # CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
 0xe0 0xf9 0xaf 0x0b
-# CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+# CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 0xe0 0xf9 0x7f 0x07
-# CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+# CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64]
 0xe0 0xf9 0x4f 0x1b
 # CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
 0xa4 0xf9 0x0f 0x0f
 # CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4] 
 0xa4 0xf9 0x3f 0x0f
-# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32] 
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32] 
 0xa4 0xf9 0x1d 0x0f
-# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4, :32]! 
+# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4:32]! 
 0xa4 0xf9 0x35 0x0f
-# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32], r5 
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32], r5 
 0xa4 0xf9 0x4f 0x0f
 # CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4] 
 0xa4 0xf9 0x7f 0x0f
-# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64] 
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64] 
 0xa4 0xf9 0x5d 0x0f
-# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4, :64]! 
+# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4:64]! 
 0xa4 0xf9 0x75 0x0f
-# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64], r5 
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64], r5 
 0xa4 0xf9 0x8f 0x0f
 # CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4] 
 0xa4 0xf9 0xbf 0x0f
-# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :64] 
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4:64] 
 0xa4 0xf9 0xdd 0x0f
-# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4, :128]! 
+# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4:128]! 
 0xa4 0xf9 0xf5 0x0f
-# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :128], r5 
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4:128], r5 
 
 0x40 0xf9 0x1f 0x07
-# CHECK: vst1.8	{d16}, [r0, :64]
+# CHECK: vst1.8	{d16}, [r0:64]
 0x40 0xf9 0x4f 0x07
 # CHECK: vst1.16	{d16}, [r0]
 0x40 0xf9 0x8f 0x07
@@ -1539,37 +1539,37 @@
 0x40 0xf9 0xcf 0x07
 # CHECK: vst1.64	{d16}, [r0]
 0x40 0xf9 0x1f 0x0a
-# CHECK: vst1.8	{d16, d17}, [r0, :64]
+# CHECK: vst1.8	{d16, d17}, [r0:64]
 0x40 0xf9 0x6f 0x0a
-# CHECK: vst1.16	{d16, d17}, [r0, :128]
+# CHECK: vst1.16	{d16, d17}, [r0:128]
 0x40 0xf9 0x8f 0x0a
 # CHECK: vst1.32	{d16, d17}, [r0]
 0x40 0xf9 0xcf 0x0a
 # CHECK: vst1.64	{d16, d17}, [r0]
 
 0x40 0xf9 0x1f 0x08
-# CHECK: vst2.8	{d16, d17}, [r0, :64]
+# CHECK: vst2.8	{d16, d17}, [r0:64]
 0x40 0xf9 0x6f 0x08
-# CHECK: vst2.16	{d16, d17}, [r0, :128]
+# CHECK: vst2.16	{d16, d17}, [r0:128]
 0x40 0xf9 0x8f 0x08
 # CHECK: vst2.32	{d16, d17}, [r0]
 0x40 0xf9 0x1f 0x03
-# CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64]
 0x40 0xf9 0x6f 0x03
-# CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vst2.16	{d16, d17, d18, d19}, [r0:128]
 0x40 0xf9 0xbf 0x03
-# CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vst2.32	{d16, d17, d18, d19}, [r0:256]
 
 0x40 0xf9 0x1f 0x04
-# CHECK: vst3.8	{d16, d17, d18}, [r0, :64]
+# CHECK: vst3.8	{d16, d17, d18}, [r0:64]
 0x40 0xf9 0x4f 0x04
 # CHECK: vst3.16	{d16, d17, d18}, [r0]
 0x40 0xf9 0x8f 0x04
 # CHECK: vst3.32	{d16, d17, d18}, [r0]
 0x40 0xf9 0x1d 0x05
-# CHECK: vst3.8	{d16, d18, d20}, [r0, :64]!
+# CHECK: vst3.8	{d16, d18, d20}, [r0:64]!
 0x40 0xf9 0x1d 0x15
-# CHECK: vst3.8	{d17, d19, d21}, [r0, :64]!
+# CHECK: vst3.8	{d17, d19, d21}, [r0:64]!
 0x40 0xf9 0x4d 0x05
 # CHECK: vst3.16	{d16, d18, d20}, [r0]!
 0x40 0xf9 0x4d 0x15
@@ -1580,13 +1580,13 @@
 # CHECK: vst3.32	{d17, d19, d21}, [r0]!
 
 0x40 0xf9 0x1f 0x00
-# CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vst4.8	{d16, d17, d18, d19}, [r0:64]
 0x40 0xf9 0x6f 0x00
-# CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vst4.16	{d16, d17, d18, d19}, [r0:128]
 0x40 0xf9 0x3d 0x01
-# CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]!
+# CHECK: vst4.8	{d16, d18, d20, d22}, [r0:256]!
 0x40 0xf9 0x3d 0x11
-# CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+# CHECK: vst4.8	{d17, d19, d21, d23}, [r0:256]!
 0x40 0xf9 0x4d 0x01
 # CHECK: vst4.16	{d16, d18, d20, d22}, [r0]!
 0x40 0xf9 0x4d 0x11
@@ -1597,15 +1597,15 @@
 # CHECK: vst4.32	{d17, d19, d21, d23}, [r0]!
 
 0xc0 0xf9 0x3f 0x01
-# CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16]
+# CHECK: vst2.8	{d16[1], d17[1]}, [r0:16]
 0xc0 0xf9 0x5f 0x05
-# CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32]
+# CHECK: vst2.16	{d16[1], d17[1]}, [r0:32]
 0xc0 0xf9 0x8f 0x09
 # CHECK: vst2.32	{d16[1], d17[1]}, [r0]
 0xc0 0xf9 0x6f 0x15
 # CHECK: vst2.16	{d17[1], d19[1]}, [r0]
 0xc0 0xf9 0x5f 0x19
-# CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64]
+# CHECK: vst2.32	{d17[0], d19[0]}, [r0:64]
 
 0xc0 0xf9 0x2f 0x02
 # CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -1619,26 +1619,26 @@
 # CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0]
 
 0xc0 0xf9 0x3f 0x03
-# CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+# CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 0xc0 0xf9 0x4f 0x07
 # CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
 0xc0 0xf9 0xaf 0x0b
-# CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+# CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 0xc0 0xf9 0xff 0x17
-# CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+# CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 0xc0 0xf9 0x4f 0x1b
 # CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
 0x63 0xf9 0x37 0xc9
-# CHECK: vld2.8	{d28, d30}, [r3, :256], r7
+# CHECK: vld2.8	{d28, d30}, [r3:256], r7
 
 # rdar://10798451
 0xe7 0xf9 0x32 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
+# CHECK vld2.8	{d17[], d19[]}, [r7:16], r2
 0xe7 0xf9 0x3d 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
+# CHECK vld2.8	{d17[], d19[]}, [r7:16]!
 0xe7 0xf9 0x3f 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
+# CHECK vld2.8	{d17[], d19[]}, [r7:16]
 
 # rdar://11034702
 0x04 0xf9 0x0d 0x87
@@ -2046,9 +2046,9 @@
 
 # rdar://10798451
 0xe7 0xf9 0x32 0x1d
-# CHECK: vld2.8	{d17[], d19[]}, [r7, :16], r2
+# CHECK: vld2.8	{d17[], d19[]}, [r7:16], r2
 0xe7 0xf9 0x3d 0x1d
-# CHECK: vld2.8	{d17[], d19[]}, [r7, :16]!
+# CHECK: vld2.8	{d17[], d19[]}, [r7:16]!
 0xe7 0xf9 0x3f 0x1d
-# CHECK: vld2.8	{d17[], d19[]}, [r7, :16]
+# CHECK: vld2.8	{d17[], d19[]}, [r7:16]
 
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
index a193319..7022486 100644
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32.txt
@@ -404,3 +404,9 @@
 
 # CHECK: xori  $9,  $6, 17767
 0x38 0xc9 0x45 0x67
+
+# CHECK: .set    push
+# CHECK: .set    mips32r2
+# CHECK: rdhwr   $5, $29
+# CHECK: .set    pop
+0x7c 0x05 0xe8 0x3b
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
index 08b3672..48fa8e2 100644
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ b/test/MC/Disassembler/Mips/mips32_le.txt
@@ -404,3 +404,9 @@
 
 # CHECK: xori  $9,  $6, 17767
 0x67 0x45 0xc9 0x38
+
+# CHECK: .set    push
+# CHECK: .set    mips32r2
+# CHECK: rdhwr   $5, $29
+# CHECK: .set    pop
+0x3b 0xe8 0x05 0x7c
diff --git a/test/MC/Disassembler/X86/intel-syntax-32.txt b/test/MC/Disassembler/X86/intel-syntax-32.txt
new file mode 100644
index 0000000..08bae6e
--- /dev/null
+++ b/test/MC/Disassembler/X86/intel-syntax-32.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s
+
+# CHECK: sgdt
+0x0f 0x01 0x00
+
+# CHECK: sidt
+0x0f 0x01 0x08
+
+# CHECK: lgdt
+0x0f 0x01 0x10
+
+# CHECK: lidt
+0x0f 0x01 0x18
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 99d49943..76d67d3 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -630,3 +630,21 @@
 
 # CHECK: movntss %xmm0, (%edi)
 0xf3 0x0f 0x2b 0x07
+
+# CHECK: prefetch (%eax)
+0x0f 0x0d 0x00
+
+# CHECK: prefetchw (%eax)
+0x0f 0x0d 0x08
+
+# CHECK: adcxl %eax, %eax
+0x66 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adcxl (%eax), %eax
+0x66 0x0f 0x38 0xf6 0x00
+
+# CHECK: adoxl %eax, %eax
+0xf3 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adoxl (%eax), %eax
+0xf3 0x0f 0x38 0xf6 0x00
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index df449a4..1345741 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -2,64 +2,64 @@
 
 # Coverage
 
-# CHECK: vcmptrue_usps 
+# CHECK: vcmptrue_usps
 0xc5 0x04 0xc2 0xc7 0x1f
 
-# CHECK: vcmptrue_uspd 
+# CHECK: vcmptrue_uspd
 0xc5 0x05 0xc2 0xc7 0x1f
 
-# CHECK: vcmptrue_usss 
+# CHECK: vcmptrue_usss
 0xc5 0x06 0xc2 0xc7 0x1f
 
-# CHECK: vcmptrue_ussd 
+# CHECK: vcmptrue_ussd
 0xc5 0x07 0xc2 0xc7 0x1f
 
-# CHECK: vcmpeq_uqps 
+# CHECK: vcmpeq_uqps
 0xc5 0x04 0xc2 0xc7 0x08
 
-# CHECK: vcmpeq_uqpd 
+# CHECK: vcmpeq_uqpd
 0xc5 0x05 0xc2 0xc7 0x08
 
-# CHECK: vcmpeq_uqss 
+# CHECK: vcmpeq_uqss
 0xc5 0x06 0xc2 0xc7 0x08
 
-# CHECK: vcmpeq_uqsd 
+# CHECK: vcmpeq_uqsd
 0xc5 0x07 0xc2 0xc7 0x08
 
-# CHECK: vcmpeqps 
+# CHECK: vcmpeqps
 0xc5 0x04 0xc2 0xc7 0x00
 
-# CHECK: vcmpeqpd 
+# CHECK: vcmpeqpd
 0xc5 0x05 0xc2 0xc7 0x00
 
-# CHECK: vcmpeqss 
+# CHECK: vcmpeqss
 0xc5 0x06 0xc2 0xc7 0x00
 
-# CHECK: vcmpeqsd 
+# CHECK: vcmpeqsd
 0xc5 0x07 0xc2 0xc7 0x00
 
-# CHECK: cmpeqps 
+# CHECK: cmpeqps
 0x0f 0xc2 0xc7 0x00
 
-# CHECK: cmpeqpd 
+# CHECK: cmpeqpd
 0x66 0x0f 0xc2 0xc7 0x00
 
-# CHECK: cmpeqss 
+# CHECK: cmpeqss
 0xf3 0x0f 0xc2 0xc7 0x00
 
-# CHECK: cmpeqsd 
+# CHECK: cmpeqsd
 0xf2 0x0f 0xc2 0xc7 0x00
 
-# CHECK: cmpordps 
+# CHECK: cmpordps
 0x0f 0xc2 0xc7 0x07
 
-# CHECK: cmpordpd 
+# CHECK: cmpordpd
 0x66 0x0f 0xc2 0xc7 0x07
 
-# CHECK: cmpordss 
+# CHECK: cmpordss
 0xf3 0x0f 0xc2 0xc7 0x07
 
-# CHECK: cmpordsd 
+# CHECK: cmpordsd
 0xf2 0x0f 0xc2 0xc7 0x07
 
 # CHECK: extrq  $2, $3, %xmm0
@@ -79,3 +79,27 @@
 
 # CHECK: movntss %xmm0, (%rdi)
 0xf3 0x0f 0x2b 0x07
+
+# CHECK: adcxl %eax, %eax
+0x66 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adcxl (%rax), %eax
+0x66 0x0f 0x38 0xf6 0x00
+
+# CHECK: adcxq %rax, %rax
+0x66 0x48 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adcxq (%rax), %rax
+0x66 0x48 0x0f 0x38 0xf6 0x00
+
+# CHECK: adoxl %eax, %eax
+0xf3 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adoxl (%rax), %eax
+0xf3 0x0f 0x38 0xf6 0x00
+
+# CHECK: adoxq %rax, %rax
+0xf3 0x48 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adoxq (%rax), %rax
+0xf3 0x48 0x0f 0x38 0xf6 0x00
diff --git a/test/MC/Disassembler/XCore/xcore.txt b/test/MC/Disassembler/XCore/xcore.txt
index f6b9c90..8ad7588 100644
--- a/test/MC/Disassembler/XCore/xcore.txt
+++ b/test/MC/Disassembler/XCore/xcore.txt
@@ -21,6 +21,57 @@
 # CHECK: waiteu
 0xec 0x07
 
+# CHECK: dcall
+0xfc 0x07
+
+# CHECK: dentsp
+0xec 0x17
+
+# CHECK: drestsp
+0xed 0x17
+
+# CHECK: dret
+0xfe 0x07
+
+# CHECK: freet
+0xef 0x07
+
+# CHECK: get r11, kep
+0xef 0x17
+
+# CHECK: get r11, ksp
+0xfc 0x17
+
+# CHECK: kret
+0xfd 0x07
+
+# CHECK: ldw et, sp[4]
+0xfe 0x17
+
+# CHECK: ldw sed, sp[3]
+0xfd 0x17
+
+# CHECK: ldw spc, sp[1]
+0xec 0x0f
+
+# CHECK: ldw ssr, sp[2]
+0xee 0x0f
+
+# CHECK: set kep, r11
+0xff 0x07
+
+# CHECK: stw et, sp[4]
+0xfd 0x0f
+
+# CHECK: stw sed, sp[3]
+0xfc 0x0f
+
+# CHECK: stw spc, sp[1]
+0xed 0x0f
+
+# CHECK: stw ssr, sp[2]
+0xef 0x0f
+
 # 1r instructions
 
 # CHECK: msync res[r0]
@@ -59,6 +110,33 @@
 # CHECK: eeu res[r11]
 0xfb 0x07
 
+# CHECK: set dp, r5
+0xe5 0x37
+
+# CHECK: set cp, r0
+0xf0 0x37
+
+# CHECK: dgetreg r11
+0xeb 0x3f
+
+# CHECK: edu res[r8]
+0xe8 0x07
+
+# CHECK: kcall r2
+0xe2 0x47
+
+# CHECK: waitef r10
+0xfa 0x0f
+
+# CHECK: waitet r7
+0xe7 0x0f
+
+# CHECK: start t[r4]
+0xe4 0x1f
+
+# CHECK: clrpt res[r9]
+0xe9 0x87
+
 # 2r instructions
 
 # CHECK: not r1, r8
@@ -139,6 +217,15 @@
 # CHECK: sext r9, r1
 0x45 0x37
 
+# CHECK: tsetmr r7, r3
+0x1f 0x1f
+
+# CHECK: eef r1, res[r6]
+0x96 0x2f
+
+# CHECK: eet r11, res[r0]
+0x5c 0x27
+
 # rus instructions
 
 # CHECK: chkct res[r1], 8
@@ -196,3 +283,359 @@
 
 # CHECK: settw res[r7], r2
 0x9b 0xff 0xec 0x27
+
+# CHECK: getd r8, res[r3]
+0x53 0xff 0xec 0x1f
+
+# CHECK: getn r10, res[r11]
+0xbb 0xff 0xec 0x37
+
+# CHECK: testlcl r2, res[r0]
+0xc8 0xfe 0xec 0x27
+
+# CHECK: setn res[r9], r7
+0x6d 0xff 0xec 0x37
+
+# 3r instructions
+
+# CHECK: add r1, r2, r3
+0x1b 0x10
+
+# CHECK: and r11, r10, r9
+0xb9 0x3e
+
+# CHECK: eq r6, r1, r2
+0x66 0x30
+
+# CHECK: ld16s r8, r3[r4]
+0xcc 0x82
+
+# CHECK: ld8u r9, r1[r10]
+0x16 0x8d
+
+# CHECK: ldw r9, r4[r5]
+0x91 0x4b
+
+# CHECK: lss r7, r3, r0
+0x7c 0xc0
+
+# CHECK: lsu r5, r8, r6
+0x12 0xcc
+
+# CHECK: or r1, r3, r2
+0x1e 0x40
+
+# CHECK: shl r8, r2, r4
+0xc8 0x22
+
+# CHECK: shr r9, r7, r1
+0x5d 0x29
+
+# CHECK: sub r4, r2, r5
+0x89 0x1a
+
+# CHECK: set t[r0]:r1, r2
+0x18 0xb8
+
+# 2rus instructions
+
+# CHECK: add r10, r2, 5
+0xe9 0x92
+
+# CHECK: eq r2, r1, 0
+0x24 0xb0
+
+# CHECK: ldw r5, r6[1]
+0x19 0x09
+
+# CHECK: shl r6, r5, 24
+0xa6 0xa5
+
+# CHECK: shr r3, r8, 5
+0xf1 0xab
+
+# CHECK: stw r3, r2[0]
+0x38 0x00
+
+# CHECK: sub r2, r4, 11
+0x63 0x9d
+
+# l3r instructions
+
+# CHECK: ashr r5, r1, r11
+0xd7 0xfc 0xec 0x17
+
+# CHECK: crc32 r5, r6, r1
+0x19 0xf9 0xec 0xaf
+
+# CHECK: divu r9, r1, r3
+0x97 0xf8 0xec 0x4f
+
+# CHECK: divs r6, r7, r2
+0x2e 0xf9 0xec 0x47
+
+# CHECK: lda16 r11, r2[r1]
+0xb9 0xf8 0xec 0x2f
+
+# CHECK: lda16 r9, r3[-r11]
+0x1f 0xfd 0xec 0x37
+
+# CHECK: ldaw r9, r1[r2]
+0x96 0xf8 0xec 0x1f
+
+# CHECK: ldaw r8, r7[r11]
+0xcf 0xfd 0xec 0x1f
+
+# CHECK: mul r0, r4, r2
+0xc2 0xf8 0xec 0x3f
+
+# CHECK: remu r1, r2, r3
+0x1b 0xf8 0xec 0xcf
+
+# CHECK: rems r11, r10, r9
+0xb9 0xfe 0xec 0xc7
+
+# CHECK: st16 r5, r3[r8]
+0xdc 0xfc 0xec 0x87
+
+# CHECK: stw r7, r10[r1]
+0xf9 0xf9 0xec 0x07
+
+# CHECK: xor r4, r3, r9
+0xcd 0xfc 0xec 0x0f
+
+# l2rus instructions
+
+# CHECK: ashr r5, r1, 3
+0x57 0xf8 0xec 0x97
+
+# CHECK: ldaw r11, r10[6]
+0x7a 0xfc 0xec 0x9f
+
+# CHECK: ldaw r8, r2[-9]
+0x09 0xfd 0xec 0xa7
+
+# CHECK: inpw r6, res[r1], 8
+0xe4 0xfc 0xee 0x97
+
+# CHECK: outpw res[r3], r0, 2
+0x0e 0xf8 0xed 0x97
+
+# ru6 / lru6 instructions
+
+# CHECK: bt r6, -5
+0x85 0x75
+
+# CHECK: bt r10, -451
+0x07 0xf0 0x83 0x76
+
+# CHECK: bt r8, 10
+0x0a 0x72
+
+# CHECK: bt r1, 6451
+0x64 0xf0 0x73 0x70
+
+# CHECK: bf r5, 8
+0x48 0x79
+
+# CHECK: bf r6, 65
+0x01 0xf0 0x81 0x79
+
+# CHECK: bf r1, 53
+0x75 0x78
+
+# CHECK: bf r10, 101
+0x01 0xf0 0xa5 0x7a
+
+# CHECK: ldaw r11, dp[63]
+0xff 0x62
+
+# CHECK: ldaw r1, dp[456]
+0x07 0xf0 0x48 0x60
+
+# CHECK: ldaw r3, sp[2]
+0xc2 0x64
+
+# CHECK: ldaw r8, sp[65535]
+0xff 0xf3 0x3f 0x66
+
+# CHECK: ldc r3, 30
+0xde 0x68
+
+# CHECK: ldc r11, 1000
+0x0f 0xf0 0xe8 0x6a
+
+# CHECK: ldw r0, cp[4]
+0x04 0x6c
+
+# CHECK: ldw r1, cp[32345]
+0xf9 0xf1 0x59 0x6c
+
+# CHECK: ldw r10, dp[16]
+0x90 0x5a
+
+# CHECK: ldw r10, dp[76]
+0x01 0xf0 0x8c 0x5a
+
+# CHECK: ldw r8, sp[51]
+0x33 0x5e
+
+# CHECK: ldw r8, sp[1225]
+0x13 0xf0 0x09 0x5e
+
+# CHECK: setc res[r5], 36
+0x64 0xe9
+
+# CHECK: setc res[r2], 40312
+0x75 0xf2 0xb8 0xe8
+
+# CHECK: stw r8, dp[14]
+0x0e 0x52
+
+# CHECK: stw r9, dp[654]
+0x0a 0xf0 0x4e 0x52
+
+# CHECK: stw r1, sp[32]
+0x60 0x54
+
+# CHECK: stw r0, sp[8761]
+0x88 0xf0 0x39 0x54
+
+# u6 / lu6 instructions
+
+# CHECK: bu -20
+0x14 0x77
+
+# CHECK: bu -1000
+0x0f 0xf0 0x28 0x77
+
+# CHECK: bu 24
+0x18 0x73
+
+# CHECK: bu 2231
+0x22 0xf0 0x37 0x73
+
+# CHECK: extsp 9
+0x89 0x77
+
+# CHECK: extsp 5721
+0x59 0xf0 0x99 0x77
+
+# CHECK: clrsr 60
+0x3c 0x7b
+
+# CHECK: clrsr 64391
+0xee 0xf3 0x07 0x7b
+
+# CHECK: entsp 1
+0x41 0x77
+
+# CHECK: entsp 70
+0x01 0xf0 0x46 0x77
+
+# CHECK: ldaw r11, cp[5]
+0x45 0x7f
+
+# CHECK: ldaw r11, cp[33000]
+0x03 0xf2 0x68 0x7f
+
+# CHECK: retsp 40
+0xe8 0x77
+
+# CHECK: retsp 52010
+0x2c 0xf3 0xea 0x77
+
+# CHECK: setsr 42
+0x6a 0x7b
+
+# CHECK: setsr 21863
+0x55 0xf1 0x67 0x7b
+
+# CHECK: extdp 4
+0x84 0x73
+
+# CHECK: extdp 554
+0x08 0xf0 0xaa 0x73
+
+# CHECK: blat 9
+0x49 0x73
+
+# CHECK: blat 61212
+0xbc 0xf3 0x5c 0x73
+
+# CHECK: getsr r11, 54
+0x36 0x7f
+
+# CHECK: getsr r11, 442
+0x06 0xf0 0x3a 0x7f
+
+# CHECK: kcall 11
+0xcb 0x73
+
+# CHECK: kcall 4001
+0x3e 0xf0 0xe1 0x73
+
+# CHECK: kentsp 22
+0x96 0x7b
+
+# CHECK: kentsp 8793
+0x89 0xf0 0x99 0x7b
+
+# CHECK: krestsp 0
+0xc0 0x7b
+
+# CHECK: krestsp 55312
+0x60 0xf3 0xd0 0x7b
+
+# u10 / lu10 instructions
+
+# CHECK: ldap r11, 40
+0x28 0xd8
+
+# CHECK: ldap r11, 53112
+0x33 0xf0 0x78 0xdb
+
+# CHECK: bl 8
+0x08 0xd0
+
+# CHECK: bl 38631
+0x25 0xf0 0xe7 0xd2
+
+# CHECK: bla cp[500]
+0xf4 0xe1
+
+# CHECK: bla cp[413742]
+0x94 0xf1 0x2e 0xe0
+
+# CHECK: ldw r11, cp[132]
+0x84 0xe4
+
+# CHECK: ldw r11, cp[3444]
+0x35 0xf0 0xf4 0x6e
+
+# l6r instructions
+
+# CHECK: lmul r11, r0, r2, r5, r8, r10
+0xf9 0xfa 0x02 0x06
+
+# l5r instructions
+
+# CHECK: ladd r10, r2, r5, r1, r7
+0xe5 0xf8 0xfb 0x06
+
+# CHECK: ldivu r5, r6, r3, r9, r8
+0x54 0xfe 0x0b 0x07
+
+# CHECK: lsub r1, r8, r7, r11, r5
+0xcf 0xfd 0x85 0x0f
+
+# l4r instructions
+
+# CHECK: crc8 r6, r3, r4, r11
+0x73 0xfd 0xe6 0x07
+
+# CHECK: maccs r11, r8, r2, r4
+0xf8 0xfa 0xe8 0x0f
+
+# CHECK: maccu r0, r2, r5, r8
+0x44 0xfd 0xf2 0x07
diff --git a/test/MC/ELF/comp-dir.s b/test/MC/ELF/comp-dir.s
index 50d10eb..59e3d7d 100644
--- a/test/MC/ELF/comp-dir.s
+++ b/test/MC/ELF/comp-dir.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -triple=x86_64-linux-unknown -g -fdebug-compilation-dir=/test/comp/dir %s -filetype=obj -o %t.o
-// RUN: llvm-dwarfdump %t.o | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
 
 // CHECK: DW_AT_comp_dir [DW_FORM_string] ("{{([A-Za-z]:.*)?}}/test/comp/dir")
 
diff --git a/test/MC/MachO/bad-dollar.s b/test/MC/MachO/bad-dollar.s
new file mode 100644
index 0000000..fd72ed0
--- /dev/null
+++ b/test/MC/MachO/bad-dollar.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+.long $1
+// CHECK-ERROR: 4:7: error: invalid token in expression
diff --git a/test/MC/MachO/bad-macro.s b/test/MC/MachO/bad-macro.s
new file mode 100644
index 0000000..0aaba09
--- /dev/null
+++ b/test/MC/MachO/bad-macro.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-OUTPUT < %t %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+.macro test_macro reg1, reg2
+mov $1, %eax
+mov $2, %eax
+.endmacro
+test_macro %ebx, %ecx
+
+// CHECK-ERROR: 5:1: warning: macro defined with named parameters which are not used in macro body, possible positional parameter found in body which will have no effect
+
+// CHECK-OUTPUT: movl	$1, %eax
+// CHECK-OUTPUT: movl	$2, %eax
diff --git a/test/MC/MachO/gen-dwarf-cpp.s b/test/MC/MachO/gen-dwarf-cpp.s
index cb749f4..e42a63a 100644
--- a/test/MC/MachO/gen-dwarf-cpp.s
+++ b/test/MC/MachO/gen-dwarf-cpp.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
-// RUN: llvm-dwarfdump %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 # 100 "t.s" 1
 .globl _bar
diff --git a/test/MC/MachO/gen-dwarf-macro-cpp.s b/test/MC/MachO/gen-dwarf-macro-cpp.s
index 05a449b..6177814 100644
--- a/test/MC/MachO/gen-dwarf-macro-cpp.s
+++ b/test/MC/MachO/gen-dwarf-macro-cpp.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
-// RUN: llvm-dwarfdump %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 # 1 "foo.S" 2
 .macro switcher
diff --git a/test/MC/MachO/gen-dwarf-producer.s b/test/MC/MachO/gen-dwarf-producer.s
new file mode 100644
index 0000000..f7388db
--- /dev/null
+++ b/test/MC/MachO/gen-dwarf-producer.s
@@ -0,0 +1,8 @@
+// RUN: env DEBUG_PRODUCER="my producer" llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
+// RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+.globl _bar
+_bar:
+	ret
+
+// CHECK:    DW_AT_producer [DW_FORM_string]	("my producer")
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
index cf2d1db..d763dd1 100644
--- a/test/MC/MachO/gen-dwarf.s
+++ b/test/MC/MachO/gen-dwarf.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
-// RUN: llvm-dwarfdump %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
 
 .globl _bar
 _bar:
diff --git a/test/MC/MachO/linker-option-1.s b/test/MC/MachO/linker-option-1.s
new file mode 100644
index 0000000..a01cab7
--- /dev/null
+++ b/test/MC/MachO/linker-option-1.s
@@ -0,0 +1,21 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-OUTPUT < %t %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+        
+// CHECK-OUTPUT: .linker_option "a"
+.linker_option "a"
+// CHECK-OUTPUT: .linker_option "a", "b"
+.linker_option "a", "b"
+// CHECK-OUTPUT-NOT: .linker_option
+// CHECK-ERROR: expected string in '.linker_option' directive
+// CHECK-ERROR: .linker_option 10
+// CHECK-ERROR:                ^
+.linker_option 10
+// CHECK-ERROR: expected string in '.linker_option' directive
+// CHECK-ERROR: .linker_option "a",
+// CHECK-ERROR:                    ^
+.linker_option "a",
+// CHECK-ERROR: unexpected token in '.linker_option' directive
+// CHECK-ERROR: .linker_option "a" "b"
+// CHECK-ERROR:                    ^
+.linker_option "a" "b"
diff --git a/test/MC/MachO/linker-option-2.s b/test/MC/MachO/linker-option-2.s
new file mode 100644
index 0000000..bb5966b
--- /dev/null
+++ b/test/MC/MachO/linker-option-2.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -n -triple x86_64-apple-darwin10 %s -filetype=obj | macho-dump | FileCheck %s
+
+// CHECK: ('load_commands_size', 104)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 45)
+// CHECK:   ('size', 16)
+// CHECK:   ('count', 1)
+// CHECK:   ('_strings', [
+// CHECK: 	"a",
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 45)
+// CHECK:   ('size', 16)
+// CHECK:   ('count', 2)
+// CHECK:   ('_strings', [
+// CHECK: 	"a",
+// CHECK: 	"b",
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
+
+.linker_option "a"
+.linker_option "a", "b"
diff --git a/test/MC/MachO/linker-options.ll b/test/MC/MachO/linker-options.ll
new file mode 100644
index 0000000..827adfd
--- /dev/null
+++ b/test/MC/MachO/linker-options.ll
@@ -0,0 +1,43 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -o - %s > %t
+; RUN: FileCheck --check-prefix=CHECK-ASM < %t %s
+
+; CHECK-ASM: .linker_option "-lz"
+; CHECK-ASM-NEXT: .linker_option "-framework", "Cocoa"
+
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | macho-dump > %t
+; RUN: FileCheck --check-prefix=CHECK-OBJ < %t %s
+
+; CHECK-OBJ: ('load_commands', [
+; CHECK-OBJ:   # Load Command 1
+; CHECK-OBJ:  (('command', 45)
+; CHECK-OBJ:   ('size', 16)
+; CHECK-OBJ:   ('count', 1)
+; CHECK-OBJ:   ('_strings', [
+; CHECK-OBJ: 	"-lz",
+; CHECK-OBJ:   ])
+; CHECK-OBJ:  ),
+; CHECK-OBJ:   # Load Command 2
+; CHECK-OBJ:  (('command', 45)
+; CHECK-OBJ:   ('size', 32)
+; CHECK-OBJ:   ('count', 2)
+; CHECK-OBJ:   ('_strings', [
+; CHECK-OBJ: 	"-framework",
+; CHECK-OBJ: 	"Cocoa",
+; CHECK-OBJ:   ])
+; CHECK-OBJ:   # Load Command 3
+; CHECK-OBJ:  (('command', 45)
+; CHECK-OBJ:   ('size', 24)
+; CHECK-OBJ:   ('count', 1)
+; CHECK-OBJ:   ('_strings', [
+; CHECK-OBJ: 	"-lmath",
+; CHECK-OBJ:   ])
+; CHECK-OBJ:  ),
+; CHECK-OBJ: ])
+
+!0 = metadata !{ i32 6, metadata !"Linker Options", 
+   metadata !{
+      metadata !{ metadata !"-lz" },
+      metadata !{ metadata !"-framework", metadata !"Cocoa" },
+      metadata !{ metadata !"-lmath" } } }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/MC/Mips/elf-gprel-32-64.ll b/test/MC/Mips/elf-gprel-32-64.ll
new file mode 100644
index 0000000..b946822
--- /dev/null
+++ b/test/MC/Mips/elf-gprel-32-64.ll
@@ -0,0 +1,37 @@
+; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - \
+; RUN: | elf-dump --dump-section-data \
+; RUN: | FileCheck %s
+
+define i32 @test(i32 %c) nounwind {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb2
+    i32 2, label %sw.bb5
+    i32 3, label %sw.bb8
+  ]
+
+sw.bb:
+  br label %return
+sw.bb2:
+  br label %return
+sw.bb5:
+  br label %return
+sw.bb8:
+  br label %return
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ -1, %sw.default ], [ 7, %sw.bb8 ], [ 2, %sw.bb5 ], [ 3, %sw.bb2 ], [ 1, %sw.bb ]
+  ret i32 %retval.0
+}
+
+; Check that the appropriate relocations were created.
+
+; R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK: (('sh_name', 0x{{[a-z0-9]+}}) # '.rela.rodata'
+; CHECK:      ('r_type3', 0x00)
+; CHECK-NEXT: ('r_type2', 0x12)
+; CHECK-NEXT: ('r_type', 0x0c)
+
diff --git a/test/MC/Mips/elf-reginfo.ll b/test/MC/Mips/elf-reginfo.ll
new file mode 100644
index 0000000..1d7a188
--- /dev/null
+++ b/test/MC/Mips/elf-reginfo.ll
@@ -0,0 +1,31 @@
+ ; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - \
+ ; RUN: | elf-dump --dump-section-data  | FileCheck --check-prefix=CHECK_64 %s
+ ; RUN: llc -filetype=obj -march=mipsel -mcpu=mips32 %s -o - \
+ ; RUN: | elf-dump --dump-section-data  | FileCheck --check-prefix=CHECK_32 %s
+
+; Check for register information sections.
+;
+
+@str = private unnamed_addr constant [12 x i8] c"hello world\00"
+
+define i32 @main() nounwind {
+entry:
+; Check that the appropriate relocations were created.
+
+; check for .MIPS.options
+; CHECK_64:      (('sh_name', 0x{{[0-9|a-f]+}}) # '.MIPS.options'
+; CHECK_64-NEXT: ('sh_type', 0x7000000d)
+; CHECK_64-NEXT: ('sh_flags', 0x0000000008000002)
+
+; check for .reginfo
+; CHECK_32:      (('sh_name', 0x{{[0-9|a-f]+}}) # '.reginfo'
+; CHECK_32-NEXT: ('sh_type', 0x70000006)
+; CHECK_32-NEXT: ('sh_flags', 0x00000002)
+
+
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0))
+  ret i32 0
+
+}
+declare i32 @puts(i8* nocapture) nounwind
+  
diff --git a/test/MC/Mips/elf_eflags.ll b/test/MC/Mips/elf_eflags.ll
new file mode 100644
index 0000000..315cb81
--- /dev/null
+++ b/test/MC/Mips/elf_eflags.ll
@@ -0,0 +1,66 @@
+; This tests ELF EFLAGS setting with direct object.
+; When the assembler is ready a .s file for it will
+; be created.
+
+; Non-shared (static) is the absence of pic and or cpic.
+
+; EF_MIPS_NOREORDER (0x00000001) is always on by default currently
+; EF_MIPS_PIC (0x00000002)
+; EF_MIPS_CPIC (0x00000004) - not tested yet
+; EF_MIPS_ABI2 (0x00000020) - n32 not tested yet
+; EF_MIPS_ARCH_32 (0x50000000)
+; EF_MIPS_ARCH_64 (0x60000000)
+; EF_MIPS_ARCH_32R2 (0x70000000)
+; EF_MIPS_ARCH_64R2 (0x80000000)
+
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
+
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64R2 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
+
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
+ 
+; 32(R1) bit with NO_REORDER and static
+; CHECK-BE32: ('e_flags', 0x50001001)
+;
+; 32(R1) bit with NO_REORDER and PIC
+; CHECK-BE32_PIC: ('e_flags', 0x50001003)
+;
+; 32R2 bit with NO_REORDER and static
+; CHECK-BE32R2: ('e_flags', 0x70001001)
+;
+; 32R2 bit with NO_REORDER and PIC
+; CHECK-BE32R2_PIC: ('e_flags', 0x70001003)
+;
+; 32R2 bit MICROMIPS with NO_REORDER and static
+; CHECK-BE32R2-MICROMIPS: ('e_flags', 0x72001001)
+;
+; 32R2 bit MICROMIPS with NO_REORDER and PIC
+;CHECK-BE32R2-MICROMIPS_PIC:  ('e_flags', 0x72001003)
+;
+; 64(R1) bit with NO_REORDER and static
+; CHECK-BE64: ('e_flags', 0x60000001)
+;
+; 64(R1) bit with NO_REORDER and PIC
+; CHECK-BE64_PIC: ('e_flags', 0x60000003)
+;
+; 64R2 bit with NO_REORDER and static
+; CHECK-BE64R2: ('e_flags', 0x80000001)
+;
+; 64R2 bit with NO_REORDER and PIC
+; CHECK-BE64R2_PIC: ('e_flags', 0x80000003)
+;
+; 32R2 bit MIPS16 with PIC
+; CHECK-LE32R2-MIPS16: ('e_flags', 0x74001002)
+ 
+define i32 @main() nounwind {
+entry:
+  ret i32 0
+}
diff --git a/test/MC/Mips/elf_st_other.ll b/test/MC/Mips/elf_st_other.ll
new file mode 100644
index 0000000..f188ce7
--- /dev/null
+++ b/test/MC/Mips/elf_st_other.ll
@@ -0,0 +1,13 @@
+; This tests value of ELF st_other field for function symbol table entries.
+; For microMIPS value should be equal to STO_MIPS_MICROMIPS.
+
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+define i32 @main() nounwind {
+entry:
+  ret i32 0
+}
+
+; CHECK:  'main'
+; CHECK:  ('st_other', 0x80)
+
diff --git a/test/MC/Mips/hilo-addressing.s b/test/MC/Mips/hilo-addressing.s
new file mode 100644
index 0000000..28459c2
--- /dev/null
+++ b/test/MC/Mips/hilo-addressing.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -show-encoding -triple mips-unknown-unknown %s | FileCheck %s
+
+  .ent hilo_test
+     .equ    addr, 0xdeadbeef
+# CHECK: # encoding: [0x3c,0x04,0xde,0xae]
+    lui $4,%hi(addr)
+# CHECK: # encoding: [0x03,0xe0,0x00,0x08]
+    jr  $31
+# CHECK: # encoding: [0x80,0x82,0xbe,0xef]
+    lb  $2,%lo(addr)($4)
+    .end hilo_test
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
index 2997782..816138e 100644
--- a/test/MC/Mips/mips-alu-instructions.s
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -31,7 +31,7 @@
 # CHECK:  xori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
 # CHECK:  xori   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
 # CHECK:  wsbh   $6, $7          # encoding: [0xa0,0x30,0x07,0x7c]
-# CHECK:  nor    $7, $8, $zero   # encoding: [0x27,0x38,0x00,0x01]
+# CHECK:  not    $7, $8          # encoding: [0x27,0x38,0x00,0x01]
      and    $9,  $6, $7
      and    $9,  $6, 17767
      andi   $9,  $6, 17767
@@ -78,9 +78,13 @@
 # CHECK:  multu  $3, $5          # encoding: [0x19,0x00,0x65,0x00]
 # CHECK:  sub    $9, $6, $7      # encoding: [0x22,0x48,0xc7,0x00]
 # CHECK:  subu   $4, $3, $5      # encoding: [0x23,0x20,0x65,0x00]
-# CHECK:  sub     $6, $zero, $7  # encoding: [0x22,0x30,0x07,0x00]
-# CHECK:  subu    $6, $zero, $7  # encoding: [0x23,0x30,0x07,0x00]
-# CHECK:  add     $7, $8, $zero  # encoding: [0x20,0x38,0x00,0x01]
+# CHECK:  neg     $6, $7         # encoding: [0x22,0x30,0x07,0x00]
+# CHECK:  negu    $6, $7         # encoding: [0x23,0x30,0x07,0x00]
+# CHECK:  move    $7, $8         # encoding: [0x21,0x38,0x00,0x01]
+# CHECK:  .set    push
+# CHECK:  .set    mips32r2
+# CHECK:  rdhwr   $5, $29
+# CHECK:  .set    pop            # encoding: [0x3b,0xe8,0x05,0x7c]
     add    $9,$6,$7
     add    $9,$6,17767
     addu   $9,$6,-15001
@@ -98,3 +102,4 @@
     neg    $6,$7
     negu   $6,$7
     move   $7,$8
+    rdhwr   $5, $29
diff --git a/test/MC/Mips/mips-coprocessor-encodings.s b/test/MC/Mips/mips-coprocessor-encodings.s
index bad9163..3d638c3 100644
--- a/test/MC/Mips/mips-coprocessor-encodings.s
+++ b/test/MC/Mips/mips-coprocessor-encodings.s
@@ -1,4 +1,5 @@
-# RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding | FileCheck --check-prefix=MIPS64 %s
+# RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding \
+# RUN:| FileCheck --check-prefix=MIPS64 %s
 
 # MIPS64:	dmtc0	$12, $16, 2             # encoding: [0x40,0xac,0x80,0x02]
 # MIPS64:	dmtc0	$12, $16, 0             # encoding: [0x40,0xac,0x80,0x00]
diff --git a/test/MC/Mips/mips-jump-instructions.s b/test/MC/Mips/mips-jump-instructions.s
index 58250f3..bc2d720 100644
--- a/test/MC/Mips/mips-jump-instructions.s
+++ b/test/MC/Mips/mips-jump-instructions.s
@@ -56,6 +56,10 @@ end_of_code:
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK:   jalr $6              # encoding: [0x09,0xf8,0xc0,0x00]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr $25             # encoding: [0x09,0xf8,0x20,0x03]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr $10, $11        # encoding: [0x09,0x50,0x60,0x01]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
@@ -67,6 +71,10 @@ end_of_code:
    nop
    jalr $6
    nop
+   jalr $31, $25
+   nop
+   jalr $10, $11
+   nop
    jr $7
    nop
    j $7
diff --git a/test/MC/Mips/mips64-alu-instructions.s b/test/MC/Mips/mips64-alu-instructions.s
new file mode 100644
index 0000000..1b4ebdf
--- /dev/null
+++ b/test/MC/Mips/mips64-alu-instructions.s
@@ -0,0 +1,100 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for arithmetic and logical instructions.
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+#------------------------------------------------------------------------------
+# Logical instructions
+#------------------------------------------------------------------------------
+# CHECK:  and    $9, $6, $7      # encoding: [0x24,0x48,0xc7,0x00]
+# CHECK:  andi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x30]
+# CHECK:  andi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x30]
+# CHECK:  clo    $6, $7          # encoding: [0x21,0x30,0xe6,0x70]
+# CHECK:  clz    $6, $7          # encoding: [0x20,0x30,0xe6,0x70]
+# CHECK:  ins    $19, $9, 6, 7   # encoding: [0x84,0x61,0x33,0x7d]
+# CHECK:  nor    $9, $6, $7      # encoding: [0x27,0x48,0xc7,0x00]
+# CHECK:  or     $3, $3, $5      # encoding: [0x25,0x18,0x65,0x00]
+# CHECK:  ori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x34]
+# CHECK:  rotr   $9, $6, 7       # encoding: [0xc2,0x49,0x26,0x00]
+# CHECK:  rotrv  $9, $6, $7      # encoding: [0x46,0x48,0xe6,0x00]
+# CHECK:  sll    $4, $3, 7       # encoding: [0xc0,0x21,0x03,0x00]
+# CHECK:  sllv   $2, $3, $5      # encoding: [0x04,0x10,0xa3,0x00]
+# CHECK:  slt    $3, $3, $5      # encoding: [0x2a,0x18,0x65,0x00]
+# CHECK:  slti   $3, $3, 103     # encoding: [0x67,0x00,0x63,0x28]
+# CHECK:  slti   $3, $3, 103     # encoding: [0x67,0x00,0x63,0x28]
+# CHECK:  sltiu  $3, $3, 103     # encoding: [0x67,0x00,0x63,0x2c]
+# CHECK:  sltu   $3, $3, $5      # encoding: [0x2b,0x18,0x65,0x00]
+# CHECK:  sra    $4, $3, 7       # encoding: [0xc3,0x21,0x03,0x00]
+# CHECK:  srav   $2, $3, $5      # encoding: [0x07,0x10,0xa3,0x00]
+# CHECK:  srl    $4, $3, 7       # encoding: [0xc2,0x21,0x03,0x00]
+# CHECK:  srlv   $2, $3, $5      # encoding: [0x06,0x10,0xa3,0x00]
+# CHECK:  xor    $3, $3, $5      # encoding: [0x26,0x18,0x65,0x00]
+# CHECK:  xori    $9, $6, 17767  # encoding: [0x67,0x45,0xc9,0x38]
+# CHECK:  xori   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
+# CHECK:  wsbh   $6, $7          # encoding: [0xa0,0x30,0x07,0x7c]
+# CHECK:  not    $7, $8          # encoding: [0x27,0x38,0x00,0x01]
+     and    $9,  $6, $7
+     and    $9,  $6, 17767
+     andi   $9,  $6, 17767
+     clo    $6,  $7
+     clz    $6,  $7
+     ins    $19, $9, 6,7
+     nor    $9,  $6, $7
+     or     $3,  $3, $5
+     ori    $9,  $6, 17767
+     rotr   $9,  $6, 7
+     rotrv  $9,  $6, $7
+     sll    $4,  $3, 7
+     sllv   $2,  $3, $5
+     slt    $3,  $3, $5
+     slt    $3,  $3, 103
+     slti   $3,  $3, 103
+     sltiu  $3,  $3, 103
+     sltu   $3,  $3, $5
+     sra    $4,  $3, 7
+     srav   $2,  $3, $5
+     srl    $4,  $3, 7
+     srlv   $2,  $3, $5
+     xor    $3,  $3, $5
+     xor    $9,  $6, 17767
+     xori   $9,  $6, 17767
+     wsbh   $6,  $7
+     not    $7  ,$8
+
+#------------------------------------------------------------------------------
+# Arithmetic instructions
+#------------------------------------------------------------------------------
+
+# CHECK:  dadd    $9, $6, $7      # encoding: [0x2c,0x48,0xc7,0x00]
+# CHECK:  daddi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x60]
+# CHECK:  daddiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x64]
+# CHECK:  daddi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x60]
+# CHECK:  daddiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x64]
+# CHECK:  daddu   $9, $6, $7      # encoding: [0x2d,0x48,0xc7,0x00]
+# CHECK:  madd   $6, $7          # encoding: [0x00,0x00,0xc7,0x70]
+# CHECK:  maddu  $6, $7          # encoding: [0x01,0x00,0xc7,0x70]
+# CHECK:  msub   $6, $7          # encoding: [0x04,0x00,0xc7,0x70]
+# CHECK:  msubu  $6, $7          # encoding: [0x05,0x00,0xc7,0x70]
+# CHECK:  mult   $3, $5          # encoding: [0x18,0x00,0x65,0x00]
+# CHECK:  multu  $3, $5          # encoding: [0x19,0x00,0x65,0x00]
+# CHECK:  dsubu   $4, $3, $5     # encoding: [0x2f,0x20,0x65,0x00]
+# CHECK:  move    $7, $8         # encoding: [0x2d,0x38,0x00,0x01]
+# CHECK:  .set    push
+# CHECK:  .set    mips32r2
+# CHECK:  rdhwr   $5, $29
+# CHECK:  .set    pop            # encoding: [0x3b,0xe8,0x05,0x7c]
+
+    dadd    $9,$6,$7
+    dadd    $9,$6,17767
+    daddu   $9,$6,-15001
+    daddi   $9,$6,17767
+    daddiu  $9,$6,-15001
+    daddu   $9,$6,$7
+    madd   $6,$7
+    maddu  $6,$7
+    msub   $6,$7
+    msubu  $6,$7
+    mult   $3,$5
+    multu  $3,$5
+    dsubu   $4,$3,$5
+    move   $7,$8
+    rdhwr   $5, $29
diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s
index e2f75a8..65d584d 100644
--- a/test/MC/Mips/mips_directives.s
+++ b/test/MC/Mips/mips_directives.s
@@ -1,16 +1,19 @@
-# RUN: llvm-mc -triple mips-unknown-unknown %s
-#this test produces no output so there isS no FileCheck call
+# RUN: llvm-mc -show-encoding -triple mips-unknown-unknown %s | FileCheck %s
+#
 $BB0_2:
   .ent directives_test
-	.frame	$sp,0,$ra
-	.mask 	0x00000000,0
-	.fmask	0x00000000,0
-	.set	noreorder
-	.set	nomacro
-	.set	noat
+    .frame    $sp,0,$ra
+    .mask     0x00000000,0
+    .fmask    0x00000000,0
+    .set    noreorder
+    .set    nomacro
+    .set    noat
 $JTI0_0:
-	.gpword	($BB0_2)
-	.set  at=$12
-	.set macro
-	.set reorder
-	.end directives_test
+    .gpword    ($BB0_2)
+    .word 0x77fffffc
+# CHECK: $JTI0_0:
+# CHECK-NEXT:     .4byte    2013265916
+    .set  at=$12
+    .set macro
+    .set reorder
+    .set  at=$a0
diff --git a/test/MC/Mips/nabi-regs.s b/test/MC/Mips/nabi-regs.s
new file mode 100644
index 0000000..9371208
--- /dev/null
+++ b/test/MC/Mips/nabi-regs.s
@@ -0,0 +1,36 @@
+# OABI (o32, o64) have a different symbolic register
+# set for the A and T registers because the NABI allows
+# for 4 more register parameters (A registers) offsetting
+# the T registers.
+#
+# For now just check N64
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mcpu=mips64r2 -arch=mips64 | \
+# RUN: FileCheck %s
+
+# CHECK: .section    __TEXT,__text,regular,pure_instructions
+    .text
+foo:
+
+# CHECK: add    $16, $16, $4            # encoding: [0x02,0x04,0x80,0x20]
+    add $s0,$s0,$a0
+# CHECK: add    $16, $16, $6            # encoding: [0x02,0x06,0x80,0x20]
+    add $s0,$s0,$a2
+# CHECK: add    $16, $16, $7            # encoding: [0x02,0x07,0x80,0x20]
+    add $s0,$s0,$a3
+# CHECK: add    $16, $16, $8            # encoding: [0x02,0x08,0x80,0x20]
+    add $s0,$s0,$a4
+# CHECK: add    $16, $16, $9            # encoding: [0x02,0x09,0x80,0x20]
+    add $s0,$s0,$a5
+# CHECK: add    $16, $16, $10           # encoding: [0x02,0x0a,0x80,0x20]
+    add $s0,$s0,$a6
+# CHECK: add    $16, $16, $11           # encoding: [0x02,0x0b,0x80,0x20]
+    add $s0,$s0,$a7
+# CHECK: add    $16, $16, $12           # encoding: [0x02,0x0c,0x80,0x20]
+    add $s0,$s0,$t0
+# CHECK: add    $16, $16, $13           # encoding: [0x02,0x0d,0x80,0x20]
+    add $s0,$s0,$t1
+# CHECK: add    $16, $16, $14           # encoding: [0x02,0x0e,0x80,0x20]
+    add $s0,$s0,$t2
+# CHECK: add    $16, $16, $15           # encoding: [0x02,0x0f,0x80,0x20]
+    add $s0,$s0,$t3
diff --git a/test/MC/Mips/set-at-directive.s b/test/MC/Mips/set-at-directive.s
new file mode 100644
index 0000000..98a3a35
--- /dev/null
+++ b/test/MC/Mips/set-at-directive.s
@@ -0,0 +1,132 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN: FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for ".set at" and set the correct value.
+
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+    .text
+foo:
+# CHECK:   jr    $1                      # encoding: [0x08,0x00,0x20,0x00]
+    .set    at=$1
+    jr    $at
+    nop
+# CHECK:   jr    $2                      # encoding: [0x08,0x00,0x40,0x00]
+    .set    at=$2
+    jr    $at
+    nop
+# CHECK:   jr    $3                      # encoding: [0x08,0x00,0x60,0x00]
+    .set    at=$3
+    jr    $at
+    nop
+# CHECK:   jr    $4                      # encoding: [0x08,0x00,0x80,0x00]
+    .set    at=$a0
+    jr    $at
+    nop
+# CHECK:   jr    $5                      # encoding: [0x08,0x00,0xa0,0x00]
+    .set    at=$a1
+    jr    $at
+    nop
+# CHECK:   jr    $6                      # encoding: [0x08,0x00,0xc0,0x00]
+    .set    at=$a2
+    jr    $at
+    nop
+# CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+    .set    at=$a3
+    jr    $at
+    nop
+# CHECK:   jr    $8                      # encoding: [0x08,0x00,0x00,0x01]
+    .set    at=$8
+    jr    $at
+    nop
+# CHECK:   jr    $9                      # encoding: [0x08,0x00,0x20,0x01]
+    .set    at=$9
+    jr    $at
+    nop
+# CHECK:   jr    $10                     # encoding: [0x08,0x00,0x40,0x01]
+    .set    at=$10
+    jr    $at
+    nop
+# CHECK:   jr    $11                     # encoding: [0x08,0x00,0x60,0x01]
+    .set    at=$11
+    jr    $at
+    nop
+# CHECK:   jr    $12                     # encoding: [0x08,0x00,0x80,0x01]
+    .set    at=$12
+    jr    $at
+    nop
+# CHECK:   jr    $13                     # encoding: [0x08,0x00,0xa0,0x01]
+    .set    at=$13
+    jr    $at
+    nop
+# CHECK:   jr    $14                     # encoding: [0x08,0x00,0xc0,0x01]
+    .set    at=$14
+    jr    $at
+    nop
+# CHECK:   jr    $15                     # encoding: [0x08,0x00,0xe0,0x01]
+    .set    at=$15
+    jr    $at
+    nop
+# CHECK:   jr    $16                     # encoding: [0x08,0x00,0x00,0x02]
+    .set    at=$s0
+    jr    $at
+    nop
+# CHECK:   jr    $17                     # encoding: [0x08,0x00,0x20,0x02]
+    .set    at=$s1
+    jr    $at
+    nop
+# CHECK:   jr    $18                     # encoding: [0x08,0x00,0x40,0x02]
+    .set    at=$s2
+    jr    $at
+    nop
+# CHECK:   jr    $19                     # encoding: [0x08,0x00,0x60,0x02]
+    .set    at=$s3
+    jr    $at
+    nop
+# CHECK:   jr    $20                     # encoding: [0x08,0x00,0x80,0x02]
+    .set    at=$s4
+    jr    $at
+    nop
+# CHECK:   jr    $21                     # encoding: [0x08,0x00,0xa0,0x02]
+    .set    at=$s5
+    jr    $at
+    nop
+# CHECK:   jr    $22                     # encoding: [0x08,0x00,0xc0,0x02]
+    .set    at=$s6
+    jr    $at
+    nop
+# CHECK:   jr    $23                     # encoding: [0x08,0x00,0xe0,0x02]
+    .set    at=$s7
+    jr    $at
+    nop
+# CHECK:   jr    $24                     # encoding: [0x08,0x00,0x00,0x03]
+    .set    at=$24
+    jr    $at
+    nop
+# CHECK:   jr    $25                     # encoding: [0x08,0x00,0x20,0x03]
+    .set    at=$25
+    jr    $at
+    nop
+# CHECK:   jr    $26                     # encoding: [0x08,0x00,0x40,0x03]
+    .set    at=$26
+    jr    $at
+    nop
+# CHECK:   jr    $27                     # encoding: [0x08,0x00,0x60,0x03]
+    .set    at=$27
+    jr    $at
+    nop
+# CHECK:   jr    $gp                     # encoding: [0x08,0x00,0x80,0x03]
+    .set    at=$gp
+    jr    $at
+    nop
+# CHECK:   jr    $fp                     # encoding: [0x08,0x00,0xc0,0x03]
+    .set    at=$fp
+    jr    $at
+    nop
+# CHECK:   jr    $sp                     # encoding: [0x08,0x00,0xa0,0x03]
+    .set    at=$sp
+    jr    $at
+    nop
+# CHECK:   jr    $ra                     # encoding: [0x08,0x00,0xe0,0x03]
+    .set    at=$ra
+    jr    $at
+    nop
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.ll b/test/MC/PowerPC/ppc64-initial-cfa.ll
index 0e36fb7..16236c9 100644
--- a/test/MC/PowerPC/ppc64-initial-cfa.ll
+++ b/test/MC/PowerPC/ppc64-initial-cfa.ll
@@ -20,7 +20,7 @@ entry:
 ; STATIC-NEXT: ('sh_info', 0x00000000)
 ; STATIC-NEXT: ('sh_addralign', 0x0000000000000008)
 ; STATIC-NEXT: ('sh_entsize', 0x0000000000000000)
-; STATIC-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 0b0c0100 00000010 00000018 00000000 00000010 00000000')
+; STATIC-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 1b0c0100 00000010 00000018 00000000 00000010 00000000')
 
 ; STATIC:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
 ; STATIC-NEXT: ('sh_type', 0x00000004)
@@ -34,11 +34,11 @@ entry:
 ; STATIC-NEXT: ('sh_entsize', 0x0000000000000018)
 ; STATIC-NEXT: ('_relocations', [
 
-; Static build should create R_PPC64_ADDR32 relocations
+; Static build should create R_PPC64_REL32 relocations
 ; STATIC-NEXT:  # Relocation 0
 ; STATIC-NEXT:  (('r_offset', 0x000000000000001c)
 ; STATIC-NEXT:   ('r_sym', 0x{{.*}})
-; STATIC-NEXT:   ('r_type', 0x00000001)
+; STATIC-NEXT:   ('r_type', 0x0000001a)
 ; STATIC-NEXT:   ('r_addend', 0x0000000000000000)
 ; STATIC-NEXT:  ),
 ; STATIC-NEXT: ])
diff --git a/test/MC/X86/AlignedBundling/autogen-inst-offset-align-to-end.s b/test/MC/X86/AlignedBundling/autogen-inst-offset-align-to-end.s
index 7fbb71b..fbf5b52 100644
--- a/test/MC/X86/AlignedBundling/autogen-inst-offset-align-to-end.s
+++ b/test/MC/X86/AlignedBundling/autogen-inst-offset-align-to-end.s
@@ -354,6 +354,7 @@ INSTRLEN_2_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 3ef: nop
+# CHECK: 3f0: nop
 # CHECK: 3fe: incl
 
   .align 32, 0x90
@@ -517,6 +518,7 @@ INSTRLEN_3_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 5ce: nop
+# CHECK: 5d0: nop
 # CHECK: 5dd: incl
 
   .align 32, 0x90
@@ -528,6 +530,7 @@ INSTRLEN_3_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 5ef: nop
+# CHECK: 5f0: nop
 # CHECK: 5fd: incl
 
   .align 32, 0x90
@@ -680,6 +683,7 @@ INSTRLEN_4_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 7ad: nop
+# CHECK: 7b0: nop
 # CHECK: 7bc: incl
 
   .align 32, 0x90
@@ -691,6 +695,7 @@ INSTRLEN_4_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 7ce: nop
+# CHECK: 7d0: nop
 # CHECK: 7dc: incl
 
   .align 32, 0x90
@@ -702,6 +707,7 @@ INSTRLEN_4_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 7ef: nop
+# CHECK: 7f0: nop
 # CHECK: 7fc: incl
 
   .align 32, 0x90
@@ -843,6 +849,7 @@ INSTRLEN_5_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: 98c: nop
+# CHECK: 990: nop
 # CHECK: 99b: incl
 
   .align 32, 0x90
@@ -854,6 +861,7 @@ INSTRLEN_5_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 9ad: nop
+# CHECK: 9b0: nop
 # CHECK: 9bb: incl
 
   .align 32, 0x90
@@ -865,6 +873,7 @@ INSTRLEN_5_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 9ce: nop
+# CHECK: 9d0: nop
 # CHECK: 9db: incl
 
   .align 32, 0x90
@@ -876,6 +885,7 @@ INSTRLEN_5_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 9ef: nop
+# CHECK: 9f0: nop
 # CHECK: 9fb: incl
 
   .align 32, 0x90
@@ -1006,6 +1016,7 @@ INSTRLEN_6_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: b6b: nop
+# CHECK: b70: nop
 # CHECK: b7a: incl
 
   .align 32, 0x90
@@ -1017,6 +1028,7 @@ INSTRLEN_6_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: b8c: nop
+# CHECK: b90: nop
 # CHECK: b9a: incl
 
   .align 32, 0x90
@@ -1028,6 +1040,7 @@ INSTRLEN_6_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: bad: nop
+# CHECK: bb0: nop
 # CHECK: bba: incl
 
   .align 32, 0x90
@@ -1039,6 +1052,7 @@ INSTRLEN_6_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: bce: nop
+# CHECK: bd0: nop
 # CHECK: bda: incl
 
   .align 32, 0x90
@@ -1050,6 +1064,7 @@ INSTRLEN_6_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: bef: nop
+# CHECK: bf0: nop
 # CHECK: bfa: incl
 
   .align 32, 0x90
@@ -1169,6 +1184,7 @@ INSTRLEN_7_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: d4a: nop
+# CHECK: d50: nop
 # CHECK: d59: incl
 
   .align 32, 0x90
@@ -1180,6 +1196,7 @@ INSTRLEN_7_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: d6b: nop
+# CHECK: d70: nop
 # CHECK: d79: incl
 
   .align 32, 0x90
@@ -1191,6 +1208,7 @@ INSTRLEN_7_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: d8c: nop
+# CHECK: d90: nop
 # CHECK: d99: incl
 
   .align 32, 0x90
@@ -1202,6 +1220,7 @@ INSTRLEN_7_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: dad: nop
+# CHECK: db0: nop
 # CHECK: db9: incl
 
   .align 32, 0x90
@@ -1213,6 +1232,7 @@ INSTRLEN_7_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: dce: nop
+# CHECK: dd0: nop
 # CHECK: dd9: incl
 
   .align 32, 0x90
@@ -1224,6 +1244,7 @@ INSTRLEN_7_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: def: nop
+# CHECK: df0: nop
 # CHECK: df9: incl
 
   .align 32, 0x90
@@ -1332,6 +1353,7 @@ INSTRLEN_8_OFFSET_9:
   .endr
   .bundle_unlock
 # CHECK: f29: nop
+# CHECK: f30: nop
 # CHECK: f38: incl
 
   .align 32, 0x90
@@ -1343,6 +1365,7 @@ INSTRLEN_8_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: f4a: nop
+# CHECK: f50: nop
 # CHECK: f58: incl
 
   .align 32, 0x90
@@ -1354,6 +1377,7 @@ INSTRLEN_8_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: f6b: nop
+# CHECK: f70: nop
 # CHECK: f78: incl
 
   .align 32, 0x90
@@ -1365,6 +1389,7 @@ INSTRLEN_8_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: f8c: nop
+# CHECK: f90: nop
 # CHECK: f98: incl
 
   .align 32, 0x90
@@ -1376,6 +1401,7 @@ INSTRLEN_8_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: fad: nop
+# CHECK: fb0: nop
 # CHECK: fb8: incl
 
   .align 32, 0x90
@@ -1387,6 +1413,7 @@ INSTRLEN_8_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: fce: nop
+# CHECK: fd0: nop
 # CHECK: fd8: incl
 
   .align 32, 0x90
@@ -1398,6 +1425,7 @@ INSTRLEN_8_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: fef: nop
+# CHECK: ff0: nop
 # CHECK: ff8: incl
 
   .align 32, 0x90
@@ -1495,6 +1523,7 @@ INSTRLEN_9_OFFSET_8:
   .endr
   .bundle_unlock
 # CHECK: 1108: nop
+# CHECK: 1110: nop
 # CHECK: 1117: incl
 
   .align 32, 0x90
@@ -1506,6 +1535,7 @@ INSTRLEN_9_OFFSET_9:
   .endr
   .bundle_unlock
 # CHECK: 1129: nop
+# CHECK: 1130: nop
 # CHECK: 1137: incl
 
   .align 32, 0x90
@@ -1517,6 +1547,7 @@ INSTRLEN_9_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: 114a: nop
+# CHECK: 1150: nop
 # CHECK: 1157: incl
 
   .align 32, 0x90
@@ -1528,6 +1559,7 @@ INSTRLEN_9_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: 116b: nop
+# CHECK: 1170: nop
 # CHECK: 1177: incl
 
   .align 32, 0x90
@@ -1539,6 +1571,7 @@ INSTRLEN_9_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: 118c: nop
+# CHECK: 1190: nop
 # CHECK: 1197: incl
 
   .align 32, 0x90
@@ -1550,6 +1583,7 @@ INSTRLEN_9_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 11ad: nop
+# CHECK: 11b0: nop
 # CHECK: 11b7: incl
 
   .align 32, 0x90
@@ -1561,6 +1595,7 @@ INSTRLEN_9_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 11ce: nop
+# CHECK: 11d0: nop
 # CHECK: 11d7: incl
 
   .align 32, 0x90
@@ -1572,6 +1607,7 @@ INSTRLEN_9_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 11ef: nop
+# CHECK: 11f0: nop
 # CHECK: 11f7: incl
 
   .align 32, 0x90
@@ -1658,6 +1694,7 @@ INSTRLEN_10_OFFSET_7:
   .endr
   .bundle_unlock
 # CHECK: 12e7: nop
+# CHECK: 12f0: nop
 # CHECK: 12f6: incl
 
   .align 32, 0x90
@@ -1669,6 +1706,7 @@ INSTRLEN_10_OFFSET_8:
   .endr
   .bundle_unlock
 # CHECK: 1308: nop
+# CHECK: 1310: nop
 # CHECK: 1316: incl
 
   .align 32, 0x90
@@ -1680,6 +1718,7 @@ INSTRLEN_10_OFFSET_9:
   .endr
   .bundle_unlock
 # CHECK: 1329: nop
+# CHECK: 1330: nop
 # CHECK: 1336: incl
 
   .align 32, 0x90
@@ -1691,6 +1730,7 @@ INSTRLEN_10_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: 134a: nop
+# CHECK: 1350: nop
 # CHECK: 1356: incl
 
   .align 32, 0x90
@@ -1702,6 +1742,7 @@ INSTRLEN_10_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: 136b: nop
+# CHECK: 1370: nop
 # CHECK: 1376: incl
 
   .align 32, 0x90
@@ -1713,6 +1754,7 @@ INSTRLEN_10_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: 138c: nop
+# CHECK: 1390: nop
 # CHECK: 1396: incl
 
   .align 32, 0x90
@@ -1724,6 +1766,7 @@ INSTRLEN_10_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 13ad: nop
+# CHECK: 13b0: nop
 # CHECK: 13b6: incl
 
   .align 32, 0x90
@@ -1735,6 +1778,7 @@ INSTRLEN_10_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 13ce: nop
+# CHECK: 13d0: nop
 # CHECK: 13d6: incl
 
   .align 32, 0x90
@@ -1746,6 +1790,7 @@ INSTRLEN_10_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 13ef: nop
+# CHECK: 13f0: nop
 # CHECK: 13f6: incl
 
   .align 32, 0x90
@@ -1821,6 +1866,7 @@ INSTRLEN_11_OFFSET_6:
   .endr
   .bundle_unlock
 # CHECK: 14c6: nop
+# CHECK: 14d0: nop
 # CHECK: 14d5: incl
 
   .align 32, 0x90
@@ -1832,6 +1878,7 @@ INSTRLEN_11_OFFSET_7:
   .endr
   .bundle_unlock
 # CHECK: 14e7: nop
+# CHECK: 14f0: nop
 # CHECK: 14f5: incl
 
   .align 32, 0x90
@@ -1843,6 +1890,7 @@ INSTRLEN_11_OFFSET_8:
   .endr
   .bundle_unlock
 # CHECK: 1508: nop
+# CHECK: 1510: nop
 # CHECK: 1515: incl
 
   .align 32, 0x90
@@ -1854,6 +1902,7 @@ INSTRLEN_11_OFFSET_9:
   .endr
   .bundle_unlock
 # CHECK: 1529: nop
+# CHECK: 1530: nop
 # CHECK: 1535: incl
 
   .align 32, 0x90
@@ -1865,6 +1914,7 @@ INSTRLEN_11_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: 154a: nop
+# CHECK: 1550: nop
 # CHECK: 1555: incl
 
   .align 32, 0x90
@@ -1876,6 +1926,7 @@ INSTRLEN_11_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: 156b: nop
+# CHECK: 1570: nop
 # CHECK: 1575: incl
 
   .align 32, 0x90
@@ -1887,6 +1938,7 @@ INSTRLEN_11_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: 158c: nop
+# CHECK: 1590: nop
 # CHECK: 1595: incl
 
   .align 32, 0x90
@@ -1898,6 +1950,7 @@ INSTRLEN_11_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 15ad: nop
+# CHECK: 15b0: nop
 # CHECK: 15b5: incl
 
   .align 32, 0x90
@@ -1909,6 +1962,7 @@ INSTRLEN_11_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 15ce: nop
+# CHECK: 15d0: nop
 # CHECK: 15d5: incl
 
   .align 32, 0x90
@@ -1920,6 +1974,7 @@ INSTRLEN_11_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 15ef: nop
+# CHECK: 15f0: nop
 # CHECK: 15f5: incl
 
   .align 32, 0x90
@@ -1984,6 +2039,7 @@ INSTRLEN_12_OFFSET_5:
   .endr
   .bundle_unlock
 # CHECK: 16a5: nop
+# CHECK: 16b0: nop
 # CHECK: 16b4: incl
 
   .align 32, 0x90
@@ -1995,6 +2051,7 @@ INSTRLEN_12_OFFSET_6:
   .endr
   .bundle_unlock
 # CHECK: 16c6: nop
+# CHECK: 16d0: nop
 # CHECK: 16d4: incl
 
   .align 32, 0x90
@@ -2006,6 +2063,7 @@ INSTRLEN_12_OFFSET_7:
   .endr
   .bundle_unlock
 # CHECK: 16e7: nop
+# CHECK: 16f0: nop
 # CHECK: 16f4: incl
 
   .align 32, 0x90
@@ -2017,6 +2075,7 @@ INSTRLEN_12_OFFSET_8:
   .endr
   .bundle_unlock
 # CHECK: 1708: nop
+# CHECK: 1710: nop
 # CHECK: 1714: incl
 
   .align 32, 0x90
@@ -2028,6 +2087,7 @@ INSTRLEN_12_OFFSET_9:
   .endr
   .bundle_unlock
 # CHECK: 1729: nop
+# CHECK: 1730: nop
 # CHECK: 1734: incl
 
   .align 32, 0x90
@@ -2039,6 +2099,7 @@ INSTRLEN_12_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: 174a: nop
+# CHECK: 1750: nop
 # CHECK: 1754: incl
 
   .align 32, 0x90
@@ -2050,6 +2111,7 @@ INSTRLEN_12_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: 176b: nop
+# CHECK: 1770: nop
 # CHECK: 1774: incl
 
   .align 32, 0x90
@@ -2061,6 +2123,7 @@ INSTRLEN_12_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: 178c: nop
+# CHECK: 1790: nop
 # CHECK: 1794: incl
 
   .align 32, 0x90
@@ -2072,6 +2135,7 @@ INSTRLEN_12_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 17ad: nop
+# CHECK: 17b0: nop
 # CHECK: 17b4: incl
 
   .align 32, 0x90
@@ -2083,6 +2147,7 @@ INSTRLEN_12_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 17ce: nop
+# CHECK: 17d0: nop
 # CHECK: 17d4: incl
 
   .align 32, 0x90
@@ -2094,6 +2159,7 @@ INSTRLEN_12_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 17ef: nop
+# CHECK: 17f0: nop
 # CHECK: 17f4: incl
 
   .align 32, 0x90
@@ -2147,6 +2213,7 @@ INSTRLEN_13_OFFSET_4:
   .endr
   .bundle_unlock
 # CHECK: 1884: nop
+# CHECK: 1890: nop
 # CHECK: 1893: incl
 
   .align 32, 0x90
@@ -2158,6 +2225,7 @@ INSTRLEN_13_OFFSET_5:
   .endr
   .bundle_unlock
 # CHECK: 18a5: nop
+# CHECK: 18b0: nop
 # CHECK: 18b3: incl
 
   .align 32, 0x90
@@ -2169,6 +2237,7 @@ INSTRLEN_13_OFFSET_6:
   .endr
   .bundle_unlock
 # CHECK: 18c6: nop
+# CHECK: 18d0: nop
 # CHECK: 18d3: incl
 
   .align 32, 0x90
@@ -2180,6 +2249,7 @@ INSTRLEN_13_OFFSET_7:
   .endr
   .bundle_unlock
 # CHECK: 18e7: nop
+# CHECK: 18f0: nop
 # CHECK: 18f3: incl
 
   .align 32, 0x90
@@ -2191,6 +2261,7 @@ INSTRLEN_13_OFFSET_8:
   .endr
   .bundle_unlock
 # CHECK: 1908: nop
+# CHECK: 1910: nop
 # CHECK: 1913: incl
 
   .align 32, 0x90
@@ -2202,6 +2273,7 @@ INSTRLEN_13_OFFSET_9:
   .endr
   .bundle_unlock
 # CHECK: 1929: nop
+# CHECK: 1930: nop
 # CHECK: 1933: incl
 
   .align 32, 0x90
@@ -2213,6 +2285,7 @@ INSTRLEN_13_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: 194a: nop
+# CHECK: 1950: nop
 # CHECK: 1953: incl
 
   .align 32, 0x90
@@ -2224,6 +2297,7 @@ INSTRLEN_13_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: 196b: nop
+# CHECK: 1970: nop
 # CHECK: 1973: incl
 
   .align 32, 0x90
@@ -2235,6 +2309,7 @@ INSTRLEN_13_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: 198c: nop
+# CHECK: 1990: nop
 # CHECK: 1993: incl
 
   .align 32, 0x90
@@ -2246,6 +2321,7 @@ INSTRLEN_13_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 19ad: nop
+# CHECK: 19b0: nop
 # CHECK: 19b3: incl
 
   .align 32, 0x90
@@ -2257,6 +2333,7 @@ INSTRLEN_13_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 19ce: nop
+# CHECK: 19d0: nop
 # CHECK: 19d3: incl
 
   .align 32, 0x90
@@ -2268,6 +2345,7 @@ INSTRLEN_13_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 19ef: nop
+# CHECK: 19f0: nop
 # CHECK: 19f3: incl
 
   .align 32, 0x90
@@ -2310,6 +2388,7 @@ INSTRLEN_14_OFFSET_3:
   .endr
   .bundle_unlock
 # CHECK: 1a63: nop
+# CHECK: 1a70: nop
 # CHECK: 1a72: incl
 
   .align 32, 0x90
@@ -2321,6 +2400,7 @@ INSTRLEN_14_OFFSET_4:
   .endr
   .bundle_unlock
 # CHECK: 1a84: nop
+# CHECK: 1a90: nop
 # CHECK: 1a92: incl
 
   .align 32, 0x90
@@ -2332,6 +2412,7 @@ INSTRLEN_14_OFFSET_5:
   .endr
   .bundle_unlock
 # CHECK: 1aa5: nop
+# CHECK: 1ab0: nop
 # CHECK: 1ab2: incl
 
   .align 32, 0x90
@@ -2343,6 +2424,7 @@ INSTRLEN_14_OFFSET_6:
   .endr
   .bundle_unlock
 # CHECK: 1ac6: nop
+# CHECK: 1ad0: nop
 # CHECK: 1ad2: incl
 
   .align 32, 0x90
@@ -2354,6 +2436,7 @@ INSTRLEN_14_OFFSET_7:
   .endr
   .bundle_unlock
 # CHECK: 1ae7: nop
+# CHECK: 1af0: nop
 # CHECK: 1af2: incl
 
   .align 32, 0x90
@@ -2365,6 +2448,7 @@ INSTRLEN_14_OFFSET_8:
   .endr
   .bundle_unlock
 # CHECK: 1b08: nop
+# CHECK: 1b10: nop
 # CHECK: 1b12: incl
 
   .align 32, 0x90
@@ -2376,6 +2460,7 @@ INSTRLEN_14_OFFSET_9:
   .endr
   .bundle_unlock
 # CHECK: 1b29: nop
+# CHECK: 1b30: nop
 # CHECK: 1b32: incl
 
   .align 32, 0x90
@@ -2387,6 +2472,7 @@ INSTRLEN_14_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: 1b4a: nop
+# CHECK: 1b50: nop
 # CHECK: 1b52: incl
 
   .align 32, 0x90
@@ -2398,6 +2484,7 @@ INSTRLEN_14_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: 1b6b: nop
+# CHECK: 1b70: nop
 # CHECK: 1b72: incl
 
   .align 32, 0x90
@@ -2409,6 +2496,7 @@ INSTRLEN_14_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: 1b8c: nop
+# CHECK: 1b90: nop
 # CHECK: 1b92: incl
 
   .align 32, 0x90
@@ -2420,6 +2508,7 @@ INSTRLEN_14_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 1bad: nop
+# CHECK: 1bb0: nop
 # CHECK: 1bb2: incl
 
   .align 32, 0x90
@@ -2431,6 +2520,7 @@ INSTRLEN_14_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 1bce: nop
+# CHECK: 1bd0: nop
 # CHECK: 1bd2: incl
 
   .align 32, 0x90
@@ -2442,6 +2532,7 @@ INSTRLEN_14_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 1bef: nop
+# CHECK: 1bf0: nop
 # CHECK: 1bf2: incl
 
   .align 32, 0x90
@@ -2473,6 +2564,7 @@ INSTRLEN_15_OFFSET_2:
   .endr
   .bundle_unlock
 # CHECK: 1c42: nop
+# CHECK: 1c50: nop
 # CHECK: 1c51: incl
 
   .align 32, 0x90
@@ -2484,6 +2576,7 @@ INSTRLEN_15_OFFSET_3:
   .endr
   .bundle_unlock
 # CHECK: 1c63: nop
+# CHECK: 1c70: nop
 # CHECK: 1c71: incl
 
   .align 32, 0x90
@@ -2495,6 +2588,7 @@ INSTRLEN_15_OFFSET_4:
   .endr
   .bundle_unlock
 # CHECK: 1c84: nop
+# CHECK: 1c90: nop
 # CHECK: 1c91: incl
 
   .align 32, 0x90
@@ -2506,6 +2600,7 @@ INSTRLEN_15_OFFSET_5:
   .endr
   .bundle_unlock
 # CHECK: 1ca5: nop
+# CHECK: 1cb0: nop
 # CHECK: 1cb1: incl
 
   .align 32, 0x90
@@ -2517,6 +2612,7 @@ INSTRLEN_15_OFFSET_6:
   .endr
   .bundle_unlock
 # CHECK: 1cc6: nop
+# CHECK: 1cd0: nop
 # CHECK: 1cd1: incl
 
   .align 32, 0x90
@@ -2528,6 +2624,7 @@ INSTRLEN_15_OFFSET_7:
   .endr
   .bundle_unlock
 # CHECK: 1ce7: nop
+# CHECK: 1cf0: nop
 # CHECK: 1cf1: incl
 
   .align 32, 0x90
@@ -2539,6 +2636,7 @@ INSTRLEN_15_OFFSET_8:
   .endr
   .bundle_unlock
 # CHECK: 1d08: nop
+# CHECK: 1d10: nop
 # CHECK: 1d11: incl
 
   .align 32, 0x90
@@ -2550,6 +2648,7 @@ INSTRLEN_15_OFFSET_9:
   .endr
   .bundle_unlock
 # CHECK: 1d29: nop
+# CHECK: 1d30: nop
 # CHECK: 1d31: incl
 
   .align 32, 0x90
@@ -2561,6 +2660,7 @@ INSTRLEN_15_OFFSET_10:
   .endr
   .bundle_unlock
 # CHECK: 1d4a: nop
+# CHECK: 1d50: nop
 # CHECK: 1d51: incl
 
   .align 32, 0x90
@@ -2572,6 +2672,7 @@ INSTRLEN_15_OFFSET_11:
   .endr
   .bundle_unlock
 # CHECK: 1d6b: nop
+# CHECK: 1d70: nop
 # CHECK: 1d71: incl
 
   .align 32, 0x90
@@ -2583,6 +2684,7 @@ INSTRLEN_15_OFFSET_12:
   .endr
   .bundle_unlock
 # CHECK: 1d8c: nop
+# CHECK: 1d90: nop
 # CHECK: 1d91: incl
 
   .align 32, 0x90
@@ -2594,6 +2696,7 @@ INSTRLEN_15_OFFSET_13:
   .endr
   .bundle_unlock
 # CHECK: 1dad: nop
+# CHECK: 1db0: nop
 # CHECK: 1db1: incl
 
   .align 32, 0x90
@@ -2605,6 +2708,7 @@ INSTRLEN_15_OFFSET_14:
   .endr
   .bundle_unlock
 # CHECK: 1dce: nop
+# CHECK: 1dd0: nop
 # CHECK: 1dd1: incl
 
   .align 32, 0x90
@@ -2616,6 +2720,7 @@ INSTRLEN_15_OFFSET_15:
   .endr
   .bundle_unlock
 # CHECK: 1def: nop
+# CHECK: 1df0: nop
 # CHECK: 1df1: incl
 
   .align 32, 0x90
diff --git a/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s b/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
index 7cb6d06..6ca4046 100644
--- a/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
+++ b/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
@@ -26,7 +26,8 @@ foo:
 # Here we have to pad until the end of the *next* boundary because
 # otherwise the group crosses a boundary.
 # CHECK:      1a: nop
+# The nop sequence may be implemented as one instruction or many, but if
+# it's one instruction, that instruction cannot itself cross the boundary.
+# CHECK:      20: nop
 # CHECK-NEXT: 26: callq
 # CHECK-NEXT: 2b: callq
-
-
diff --git a/test/MC/X86/AlignedBundling/relax-at-bundle-end.s b/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
new file mode 100644
index 0000000..ab4affb
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Test that an instruction near a bundle end gets properly padded
+# after it is relaxed.
+.text
+foo:
+        .bundle_align_mode 5
+        .rept 29
+        push %rax
+        .endr
+# CHECK: 1c: push
+# CHECK: 1d: nop
+# CHECK: 20: jne
+        jne 0x100
+
diff --git a/test/MC/X86/gnux32-dwarf-gen.s b/test/MC/X86/gnux32-dwarf-gen.s
new file mode 100644
index 0000000..6603125
--- /dev/null
+++ b/test/MC/X86/gnux32-dwarf-gen.s
@@ -0,0 +1,24 @@
+# RUN: llvm-mc -g -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.64
+# RUN: llvm-dwarfdump -debug-dump=info %t.64 | FileCheck -check-prefix=DEFAULTABI %s
+
+# RUN: llvm-mc -g -filetype=obj -triple x86_64-pc-linux-gnux32 %s -o %t.32
+# RUN: llvm-dwarfdump -debug-dump=info %t.32 | FileCheck -check-prefix=X32ABI %s
+
+# This test checks the dwarf info section emitted to the output object by the
+# assembler, looking at the difference between the x32 ABI and default x86-64
+# ABI.
+
+# DEFAULTABI: addr_size = 0x08
+# X32ABI: addr_size = 0x04
+
+.globl _bar
+_bar:
+        movl    $0, %eax
+L1:     leave
+        ret
+_foo:
+_baz:
+        nop
+.data
+_x:     .long 1
+
diff --git a/test/MC/X86/intel-syntax-hex.s b/test/MC/X86/intel-syntax-hex.s
new file mode 100644
index 0000000..b3a19fb
--- /dev/null
+++ b/test/MC/X86/intel-syntax-hex.s
@@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
+// rdar://12470373
+
+// Checks to make sure we parse the hexadecimal suffix properly.
+// CHECK: movl $10, %eax
+  mov eax, 10
+// CHECK: movl $16, %eax
+  mov eax, 10h
+// CHECK: movl $16, %eax
+  mov eax, 10H
+// CHECK: movl $4294967295, %eax
+  mov eax, 0ffffffffh
+// CHECK: movl $4294967295, %eax
+  mov eax, 0xffffffff
+// CHECK: movl $4294967295, %eax
+  mov eax, 0xffffffffh
+// CHECK: movl $15, %eax
+  mov eax, 0fh
+// CHECK: movl $162, %eax
+  mov eax, 0a2h
+// CHECK: movl $162, %eax
+  mov eax, 0xa2
+// CHECK: movl $162, %eax
+  mov eax, 0xa2h
+// CHECK: movl $674, %eax
+  mov eax, 2a2h
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 7edd26a..8bfa58a 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -56,13 +56,195 @@ _main:
 // CHECK:	fld	%st(0)
 	fld	ST(0)
 // CHECK:	movl	%fs:(%rdi), %eax
-        mov     EAX, DWORD PTR FS:[RDI]
-// CHECK:	leal	(,%rdi,4), %r8d
-        lea     R8D, DWORD PTR [4*RDI]
-// CHECK:        movl    _fnan(,%ecx,4), %ecx
-        mov     ECX, DWORD PTR [4*ECX + _fnan]
-// CHECK:       movq    %fs:320, %rax
-        mov     RAX, QWORD PTR FS:[320]
-// CHECK:       vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm1
-        vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
+    mov EAX, DWORD PTR FS:[RDI]
+// CHECK: leal (,%rdi,4), %r8d
+    lea R8D, DWORD PTR [4*RDI]
+// CHECK: movl _fnan(,%ecx,4), %ecx
+    mov ECX, DWORD PTR [4*ECX + _fnan]
+// CHECK: movq %fs:320, %rax
+    mov RAX, QWORD PTR FS:[320]
+// CHECK: vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm1
+    vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
+// CHECK: movsd	-8, %xmm5
+    movsd   XMM5, QWORD PTR [-8]
+// CHECK: movl %ecx, (%eax)
+    mov [eax], ecx
+// CHECK: movl %ecx, (,%ebx,4)
+    mov [4*ebx], ecx
+ // CHECK:   movl %ecx, (,%ebx,4)
+    mov [ebx*4], ecx
+// CHECK: movl %ecx, 1024
+    mov [1024], ecx
+// CHECK: movl %ecx, 4132
+    mov [0x1024], ecx
+// CHECK: movl %ecx, 32        
+    mov [16 + 16], ecx
+// CHECK: movl %ecx, 0
+    mov [16 - 16], ecx        
+// CHECK: movl %ecx, 32        
+    mov [16][16], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [eax + 4*ebx], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [eax + ebx*4], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [4*ebx + eax], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [ebx*4 + eax], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [eax][4*ebx], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [eax][ebx*4], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [4*ebx][eax], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [ebx*4][eax], ecx
+// CHECK: movl %ecx, 12(%eax)
+    mov [eax + 12], ecx
+// CHECK: movl %ecx, 12(%eax)
+    mov [12 + eax], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax + 16 + 16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16 + eax + 16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16 + 16 + eax], ecx
+// CHECK: movl %ecx, 12(%eax)
+    mov [eax][12], ecx
+// CHECK: movl %ecx, 12(%eax)
+    mov [12][eax], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax][16 + 16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax + 16][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax][16][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16][eax + 16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16 + eax][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16][16 + eax], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16 + 16][eax], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax][16][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16][eax][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16][16][eax], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [4*ebx + 16], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [ebx*4 + 16], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [4*ebx][16], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [ebx*4][16], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [16 + 4*ebx], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [16 + ebx*4], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [16][4*ebx], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [16][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 4*ebx + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 16 + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx + eax + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx + 16 + eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][4*ebx + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][16 + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx][eax + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx][16 + eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 4*ebx][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 16][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx + eax][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx + 16][eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][4*ebx][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][16][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx][eax][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx][16][eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + ebx*4 + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 16 + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4 + eax + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4 + 16 + eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][ebx*4 + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][16 + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4][eax + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4][16 + eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + ebx*4][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 16][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4 + eax][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4 + 16][eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][ebx*4][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][16][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4][eax][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4][16][eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax][ebx*4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - 16], ecx
 	ret
diff --git a/test/MC/X86/shuffle-comments.s b/test/MC/X86/shuffle-comments.s
new file mode 100644
index 0000000..20fd4eb
--- /dev/null
+++ b/test/MC/X86/shuffle-comments.s
@@ -0,0 +1,271 @@
+# RUN: llvm-mc %s -triple=x86_64-unknown-unknown | FileCheck %s
+
+palignr $8, %xmm0, %xmm1
+# CHECK: xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+palignr $8, (%rax), %xmm1
+# CHECK: xmm1 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+
+palignr $16, %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+palignr $16, (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+
+palignr $0, %xmm0, %xmm1
+# CHECK: xmm1 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+palignr $0, (%rax), %xmm1
+# CHECK: xmm1 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+
+vpalignr $8, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+vpalignr $8, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+
+vpalignr $16, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+vpalignr $16, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+
+vpalignr $0, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+vpalignr $0, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+
+vpalignr $8, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm0[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],ymm0[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23]
+vpalignr $8, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = mem[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],mem[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23]
+
+vpalignr $16, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+vpalignr $16, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+
+vpalignr $0, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+vpalignr $0, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+
+pshufd $27, %xmm0, %xmm1
+# CHECK: xmm1 = xmm0[3,2,1,0]
+pshufd $27, (%rax), %xmm1
+# CHECK: xmm1 = mem[3,2,1,0]
+
+vpshufd $27, %xmm0, %xmm1
+# CHECK: xmm1 = xmm0[3,2,1,0]
+vpshufd $27, (%rax), %xmm1
+# CHECK: xmm1 = mem[3,2,1,0]
+
+vpshufd $27, %ymm0, %ymm1
+# CHECK: ymm1 = ymm0[3,2,1,0,7,6,5,4]
+vpshufd $27, (%rax), %ymm1
+# CHECK: ymm1 = mem[3,2,1,0,7,6,5,4]
+
+punpcklbw %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+punpcklbw (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3],xmm1[4],mem[4],xmm1[5],mem[5],xmm1[6],mem[6],xmm1[7],mem[7]
+
+vpunpcklbw %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+vpunpcklbw (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3],xmm1[4],mem[4],xmm1[5],mem[5],xmm1[6],mem[6],xmm1[7],mem[7]
+
+vpunpcklbw %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+vpunpcklbw (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[2],mem[2],ymm1[3],mem[3],ymm1[4],mem[4],ymm1[5],mem[5],ymm1[6],mem[6],ymm1[7],mem[7],ymm1[16],mem[16],ymm1[17],mem[17],ymm1[18],mem[18],ymm1[19],mem[19],ymm1[20],mem[20],ymm1[21],mem[21],ymm1[22],mem[22],ymm1[23],mem[23]
+
+punpckhbw %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+punpckhbw (%rax), %xmm1
+# CHECK: xmm1 = xmm1[8],mem[8],xmm1[9],mem[9],xmm1[10],mem[10],xmm1[11],mem[11],xmm1[12],mem[12],xmm1[13],mem[13],xmm1[14],mem[14],xmm1[15],mem[15]
+
+vpunpckhbw %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+vpunpckhbw (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[8],mem[8],xmm1[9],mem[9],xmm1[10],mem[10],xmm1[11],mem[11],xmm1[12],mem[12],xmm1[13],mem[13],xmm1[14],mem[14],xmm1[15],mem[15]
+
+vpunpckhbw %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
+vpunpckhbw (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[8],mem[8],ymm1[9],mem[9],ymm1[10],mem[10],ymm1[11],mem[11],ymm1[12],mem[12],ymm1[13],mem[13],ymm1[14],mem[14],ymm1[15],mem[15],ymm1[24],mem[24],ymm1[25],mem[25],ymm1[26],mem[26],ymm1[27],mem[27],ymm1[28],mem[28],ymm1[29],mem[29],ymm1[30],mem[30],ymm1[31],mem[31]
+
+punpcklwd %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+punpcklwd (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+
+vpunpcklwd %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+vpunpcklwd (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+
+vpunpcklwd %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
+vpunpcklwd (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[2],mem[2],ymm1[3],mem[3],ymm1[8],mem[8],ymm1[9],mem[9],ymm1[10],mem[10],ymm1[11],mem[11]
+
+punpckhwd %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+punpckhwd (%rax), %xmm1
+# CHECK: xmm1 = xmm1[4],mem[4],xmm1[5],mem[5],xmm1[6],mem[6],xmm1[7],mem[7]
+
+vpunpckhwd %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+vpunpckhwd (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[4],mem[4],xmm1[5],mem[5],xmm1[6],mem[6],xmm1[7],mem[7]
+
+vpunpckhwd %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
+vpunpckhwd (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[4],mem[4],ymm1[5],mem[5],ymm1[6],mem[6],ymm1[7],mem[7],ymm1[12],mem[12],ymm1[13],mem[13],ymm1[14],mem[14],ymm1[15],mem[15]
+
+punpckldq %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+punpckldq (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+
+vpunpckldq %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+vpunpckldq (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[1],mem[1]
+
+vpunpckldq %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+vpunpckldq (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[4],mem[4],ymm1[5],mem[5]
+
+punpckhdq %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+punpckhdq (%rax), %xmm1
+# CHECK: xmm1 = xmm1[2],mem[2],xmm1[3],mem[3]
+
+vpunpckhdq %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+vpunpckhdq (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[2],mem[2],xmm1[3],mem[3]
+
+vpunpckhdq %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
+vpunpckhdq (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[2],mem[2],ymm1[3],mem[3],ymm1[6],mem[6],ymm1[7],mem[7]
+
+punpcklqdq %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0]
+punpcklqdq (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0]
+
+vpunpcklqdq %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0]
+vpunpcklqdq (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0]
+
+vpunpcklqdq %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+vpunpcklqdq (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[2],mem[2]
+
+punpckhqdq %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[1],xmm0[1]
+punpckhqdq (%rax), %xmm1
+# CHECK: xmm1 = xmm1[1],mem[1]
+
+vpunpckhqdq %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],xmm0[1]
+vpunpckhqdq (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],mem[1]
+
+vpunpckhqdq %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+vpunpckhqdq (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],mem[1],ymm1[3],mem[3]
+
+unpcklps %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+unpcklps (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+
+vunpcklps %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+vunpcklps (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[1],mem[1]
+
+vunpcklps %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+vunpcklps (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[4],mem[4],ymm1[5],mem[5]
+
+unpckhps %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+unpckhps (%rax), %xmm1
+# CHECK: xmm1 = xmm1[2],mem[2],xmm1[3],mem[3]
+
+vunpckhps %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+vunpckhps (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[2],mem[2],xmm1[3],mem[3]
+
+vunpckhps %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
+vunpckhps (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[2],mem[2],ymm1[3],mem[3],ymm1[6],mem[6],ymm1[7],mem[7]
+
+unpcklpd %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0]
+unpcklpd (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0]
+
+vunpcklpd %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0]
+vunpcklpd (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0]
+
+vunpcklpd %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+vunpcklpd (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[2],mem[2]
+
+unpckhpd %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[1],xmm0[1]
+unpckhpd (%rax), %xmm1
+# CHECK: xmm1 = xmm1[1],mem[1]
+
+vunpckhpd %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],xmm0[1]
+vunpckhpd (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],mem[1]
+
+vunpckhpd %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+vunpckhpd (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],mem[1],ymm1[3],mem[3]
+
+shufps $27, %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[3,2],xmm0[1,0]
+shufps $27, (%rax), %xmm1
+# CHECK: xmm1 = xmm1[3,2],mem[1,0]
+
+vshufps $27, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[3,2],xmm0[1,0]
+vshufps $27, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[3,2],mem[1,0]
+
+vshufps $27, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[3,2],ymm0[1,0],ymm1[7,6],ymm0[5,4]
+vshufps $27, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[3,2],mem[1,0],ymm1[7,6],mem[5,4]
+
+shufpd $3, %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[1],xmm0[1]
+shufpd $3, (%rax), %xmm1
+# CHECK: xmm1 = xmm1[1],mem[1]
+
+vshufpd $3, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],xmm0[1]
+vshufpd $3, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],mem[1]
+
+vshufpd $11, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
+vshufpd $11, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],mem[1],ymm1[2],mem[3]
diff --git a/test/MC/X86/x86-32-ms-inline-asm.s b/test/MC/X86/x86-32-ms-inline-asm.s
index 73d5878..5524c70 100644
--- a/test/MC/X86/x86-32-ms-inline-asm.s
+++ b/test/MC/X86/x86-32-ms-inline-asm.s
@@ -57,4 +57,17 @@ _t21:                                   ## @t21
 // CHECK: movl 4(%esi,%eax,2), %eax
 // CHECK: # encoding: [0x8b,0x44,0x46,0x04]
 
+    pusha
+// CHECK: pushal
+// CHECK: # encoding: [0x60]
+    popa
+// CHECK: popal
+// CHECK: # encoding: [0x61]
+    pushad
+// CHECK: pushal
+// CHECK: # encoding: [0x60]
+    popad
+// CHECK: popal
+// CHECK: # encoding: [0x61]
+
 	ret
diff --git a/test/MC/X86/x86_64-fma4-encoding.s b/test/MC/X86/x86_64-fma4-encoding.s
index 805fc23..f7ee351 100644
--- a/test/MC/X86/x86_64-fma4-encoding.s
+++ b/test/MC/X86/x86_64-fma4-encoding.s
@@ -73,6 +73,67 @@
 // CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
           vfmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
 
+// PR15040
+// CHECK: vfmaddss  foo(%rip), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddss  foo(%rip), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddss   %xmm1, foo(%rip), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6a,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddss   %xmm1, foo(%rip),%xmm0, %xmm0
+
+// CHECK: vfmaddsd  foo(%rip), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddsd  foo(%rip), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm1, foo(%rip), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddsd   %xmm1, foo(%rip),%xmm0, %xmm0
+
+// CHECK: vfmaddps  foo(%rip), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddps  foo(%rip), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps   %xmm1, foo(%rip), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x68,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddps   %xmm1, foo(%rip),%xmm0, %xmm0
+
+// CHECK: vfmaddpd  foo(%rip), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddpd  foo(%rip), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddpd   %xmm1, foo(%rip), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x69,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddpd   %xmm1, foo(%rip),%xmm0, %xmm0
+
+// CHECK: vfmaddps  foo(%rip), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddps  foo(%rip), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddps   %ymm1, foo(%rip), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x68,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddps   %ymm1, foo(%rip),%ymm0, %ymm0
+
+// CHECK: vfmaddpd  foo(%rip), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddpd  foo(%rip), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddpd   %ymm1, foo(%rip), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x69,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddpd   %ymm1, foo(%rip),%ymm0, %ymm0
+
 // vfmsub
 // CHECK: vfmsubss  (%rcx), %xmm1, %xmm0, %xmm0
 // CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0x01,0x10]
diff --git a/test/Makefile b/test/Makefile
index 4e690cc..fc8ec08 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -87,14 +87,14 @@ endif # SunOS
 
 check-local:: lit.site.cfg Unit/lit.site.cfg
 	( $(ULIMIT) \
-	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_TESTSUITE) )
+	  $(PYTHON) $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_TESTSUITE) )
 
 # This is a legacy alias dating from when both DejaGNU and lit were in use.
 check-local-lit:: check-local
 
 check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-site-cfgs
 	( $(ULIMIT) \
-	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
+	  $(PYTHON) $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
 
 clean::
 	$(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print`
@@ -131,13 +131,14 @@ endif
 
 lit.site.cfg: FORCE
 	@echo "Making LLVM 'lit.site.cfg' file..."
-	@$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g > lit.tmp
+	@$(ECHOPATH) s=@LLVM_HOSTTRIPLE@=$(HOST_TRIPLE)=g > lit.tmp
+	@$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp
-	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
+	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=$(PYTHON)=g >> lit.tmp
 	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -I $(LibDir)/ocaml=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
diff --git a/test/Object/objdump-sectionheaders.test b/test/Object/objdump-sectionheaders.test
index a417d07..bc2478c 100644
--- a/test/Object/objdump-sectionheaders.test
+++ b/test/Object/objdump-sectionheaders.test
@@ -6,11 +6,11 @@
 
 ; CHECK: Sections:
 ; CHECK: Idx Name          Size      Address          Type
-; CHECK:   0               000000000 00000000000000000 
-; CHECK:   1 .text         000000026 00000000000000000 TEXT DATA 
-; CHECK:   2 .rodata.str1.1 00000000d 00000000000000026 DATA 
-; CHECK:   3 .note.GNU-stack 000000000 00000000000000033 
-; CHECK:   4 .rela.text    000000048 00000000000000038 
-; CHECK:   5 .symtab       0000000c0 00000000000000080 
-; CHECK:   6 .strtab       000000033 00000000000000140 
-; CHECK:   7 .shstrtab     00000004b 00000000000000173 
+; CHECK:   0               00000000 0000000000000000
+; CHECK:   1 .text         00000026 0000000000000000 TEXT DATA
+; CHECK:   2 .rodata.str1.1 0000000d 0000000000000026 DATA
+; CHECK:   3 .note.GNU-stack 00000000 0000000000000033
+; CHECK:   4 .rela.text    00000048 0000000000000038
+; CHECK:   5 .symtab       000000c0 0000000000000080
+; CHECK:   6 .strtab       00000033 0000000000000140
+; CHECK:   7 .shstrtab     0000004b 0000000000000173
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
index 3065c6f..2c0b54d 100644
--- a/test/Object/readobj-shared-object.test
+++ b/test/Object/readobj-shared-object.test
@@ -71,6 +71,19 @@ ELF:  .symtab                     {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  r
 ELF:  .strtab                     {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  rodata
 ELF:  Total: 14
 
+ELF:Dynamic section contains 9 entries
+ELF:  Tag        Type                 Name/Value
+ELF: 00000001 (NEEDED)             Shared library: [libc.so.6]
+ELF: 00000001 (NEEDED)             Shared library: [libm.so.6]
+ELF: 0000000e (SONAME)             Library soname: [libfoo.so]
+ELF: 00000004 (HASH)               {{[0-9a-f]+}}
+ELF: 00000005 (STRTAB)             {{[0-9a-f]+}}
+ELF: 00000006 (SYMTAB)             {{[0-9a-f]+}}
+ELF: 0000000a (STRSZ)              {{[0-9]+}} (bytes)
+ELF: 0000000b (SYMENT)             {{[0-9]+}} (bytes)
+ELF: 00000000 (NULL)               0x0
+ELF:  Total: 9
+
 ELF:Libraries needed:
 ELF:  libc.so.6
 ELF:  libm.so.6
diff --git a/test/Object/readobj.test b/test/Object/readobj.test
new file mode 100644
index 0000000..e29f404
--- /dev/null
+++ b/test/Object/readobj.test
@@ -0,0 +1,2 @@
+// Don't crash while reading non-dynamic files.
+RUN: llvm-readobj %p/Inputs/trivial-object-test.elf-x86-64
diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll
index 1d207c7..6e180cd 100644
--- a/test/Other/close-stderr.ll
+++ b/test/Other/close-stderr.ll
@@ -1,9 +1,16 @@
 ; RUN: sh -c 'opt --reject-this-option 2>&-; echo $?; opt -o /dev/null /dev/null 2>&-; echo $?;' \
 ; RUN:   | FileCheck %s
+
 ; CHECK: {{^1$}}
+; On valgrind, we got 127 here.
+; XFAIL: valgrind
+
 ; CHECK: {{^0$}}
 ; XFAIL: vg_leak
 ; REQUIRES: shell
 
+; opt will fail to open /dev/null on native win32.
+; XFAIL: win32
+
 ; Test that the error handling when writing to stderr fails exits the
 ; program cleanly rather than aborting.
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index eafb16e..0224e9f 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -118,64 +118,64 @@
 ; Duplicate all of the above as function return values rather than
 ; global initializers.
 
-; PLAIN: define i8* @goo8() nounwind {
+; PLAIN: define i8* @goo8() #0 {
 ; PLAIN:   %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1) to i8*
 ; PLAIN:   ret i8* %t
 ; PLAIN: }
-; PLAIN: define i1* @goo1() nounwind {
+; PLAIN: define i1* @goo1() #0 {
 ; PLAIN:   %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1) to i1*
 ; PLAIN:   ret i1* %t
 ; PLAIN: }
-; PLAIN: define i8* @foo8() nounwind {
+; PLAIN: define i8* @foo8() #0 {
 ; PLAIN:   %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2) to i8*
 ; PLAIN:   ret i8* %t
 ; PLAIN: }
-; PLAIN: define i1* @foo1() nounwind {
+; PLAIN: define i1* @foo1() #0 {
 ; PLAIN:   %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2) to i1*
 ; PLAIN:   ret i1* %t
 ; PLAIN: }
-; PLAIN: define i8* @hoo8() nounwind {
+; PLAIN: define i8* @hoo8() #0 {
 ; PLAIN:   %t = bitcast i8* getelementptr (i8* null, i32 -1) to i8*
 ; PLAIN:   ret i8* %t
 ; PLAIN: }
-; PLAIN: define i1* @hoo1() nounwind {
+; PLAIN: define i1* @hoo1() #0 {
 ; PLAIN:   %t = bitcast i1* getelementptr (i1* null, i32 -1) to i1*
 ; PLAIN:   ret i1* %t
 ; PLAIN: }
-; OPT: define i8* @goo8() nounwind {
+; OPT: define i8* @goo8() #0 {
 ; OPT:   ret i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1)
 ; OPT: }
-; OPT: define i1* @goo1() nounwind {
+; OPT: define i1* @goo1() #0 {
 ; OPT:   ret i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1)
 ; OPT: }
-; OPT: define i8* @foo8() nounwind {
+; OPT: define i8* @foo8() #0 {
 ; OPT:   ret i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2)
 ; OPT: }
-; OPT: define i1* @foo1() nounwind {
+; OPT: define i1* @foo1() #0 {
 ; OPT:   ret i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2)
 ; OPT: }
-; OPT: define i8* @hoo8() nounwind {
+; OPT: define i8* @hoo8() #0 {
 ; OPT:   ret i8* getelementptr (i8* null, i32 -1)
 ; OPT: }
-; OPT: define i1* @hoo1() nounwind {
+; OPT: define i1* @hoo1() #0 {
 ; OPT:   ret i1* getelementptr (i1* null, i32 -1)
 ; OPT: }
-; TO: define i8* @goo8() nounwind {
+; TO: define i8* @goo8() #0 {
 ; TO:   ret i8* null
 ; TO: }
-; TO: define i1* @goo1() nounwind {
+; TO: define i1* @goo1() #0 {
 ; TO:   ret i1* null
 ; TO: }
-; TO: define i8* @foo8() nounwind {
+; TO: define i8* @foo8() #0 {
 ; TO:   ret i8* inttoptr (i64 -1 to i8*)
 ; TO: }
-; TO: define i1* @foo1() nounwind {
+; TO: define i1* @foo1() #0 {
 ; TO:   ret i1* inttoptr (i64 -1 to i1*)
 ; TO: }
-; TO: define i8* @hoo8() nounwind {
+; TO: define i8* @hoo8() #0 {
 ; TO:   ret i8* inttoptr (i64 -1 to i8*)
 ; TO: }
-; TO: define i1* @hoo1() nounwind {
+; TO: define i1* @hoo1() #0 {
 ; TO:   ret i1* inttoptr (i64 -1 to i1*)
 ; TO: }
 ; SCEV: Classifying expressions for: @goo8
@@ -220,94 +220,94 @@ define i1* @hoo1() nounwind {
   ret i1* %t
 }
 
-; PLAIN: define i64 @fa() nounwind {
+; PLAIN: define i64 @fa() #0 {
 ; PLAIN:   %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fb() nounwind {
+; PLAIN: define i64 @fb() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fc() nounwind {
+; PLAIN: define i64 @fc() #0 {
 ; PLAIN:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fd() nounwind {
+; PLAIN: define i64 @fd() #0 {
 ; PLAIN:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fe() nounwind {
+; PLAIN: define i64 @fe() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @ff() nounwind {
+; PLAIN: define i64 @ff() #0 {
 ; PLAIN:   %t = bitcast i64 1 to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fg() nounwind {
+; PLAIN: define i64 @fg() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fh() nounwind {
+; PLAIN: define i64 @fh() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fi() nounwind {
+; PLAIN: define i64 @fi() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; OPT: define i64 @fa() nounwind {
+; OPT: define i64 @fa() #0 {
 ; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
 ; OPT: }
-; OPT: define i64 @fb() nounwind {
+; OPT: define i64 @fb() #0 {
 ; OPT:   ret i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: }
-; OPT: define i64 @fc() nounwind {
+; OPT: define i64 @fc() #0 {
 ; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
 ; OPT: }
-; OPT: define i64 @fd() nounwind {
+; OPT: define i64 @fd() #0 {
 ; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
 ; OPT: }
-; OPT: define i64 @fe() nounwind {
+; OPT: define i64 @fe() #0 {
 ; OPT:   ret i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
 ; OPT: }
-; OPT: define i64 @ff() nounwind {
+; OPT: define i64 @ff() #0 {
 ; OPT:   ret i64 1
 ; OPT: }
-; OPT: define i64 @fg() nounwind {
+; OPT: define i64 @fg() #0 {
 ; OPT:   ret i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: }
-; OPT: define i64 @fh() nounwind {
+; OPT: define i64 @fh() #0 {
 ; OPT:   ret i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
 ; OPT: }
-; OPT: define i64 @fi() nounwind {
+; OPT: define i64 @fi() #0 {
 ; OPT:   ret i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64)
 ; OPT: }
-; TO: define i64 @fa() nounwind {
+; TO: define i64 @fa() #0 {
 ; TO:   ret i64 18480
 ; TO: }
-; TO: define i64 @fb() nounwind {
+; TO: define i64 @fb() #0 {
 ; TO:   ret i64 8
 ; TO: }
-; TO: define i64 @fc() nounwind {
+; TO: define i64 @fc() #0 {
 ; TO:   ret i64 16
 ; TO: }
-; TO: define i64 @fd() nounwind {
+; TO: define i64 @fd() #0 {
 ; TO:   ret i64 88
 ; TO: }
-; TO: define i64 @fe() nounwind {
+; TO: define i64 @fe() #0 {
 ; TO:   ret i64 16
 ; TO: }
-; TO: define i64 @ff() nounwind {
+; TO: define i64 @ff() #0 {
 ; TO:   ret i64 1
 ; TO: }
-; TO: define i64 @fg() nounwind {
+; TO: define i64 @fg() #0 {
 ; TO:   ret i64 8
 ; TO: }
-; TO: define i64 @fh() nounwind {
+; TO: define i64 @fh() #0 {
 ; TO:   ret i64 8
 ; TO: }
-; TO: define i64 @fi() nounwind {
+; TO: define i64 @fi() #0 {
 ; TO:   ret i64 8
 ; TO: }
 ; SCEV: Classifying expressions for: @fa
@@ -375,34 +375,34 @@ define i64 @fi() nounwind {
   ret i64 %t
 }
 
-; PLAIN: define i64* @fM() nounwind {
+; PLAIN: define i64* @fM() #0 {
 ; PLAIN:   %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
 ; PLAIN:   ret i64* %t
 ; PLAIN: }
-; PLAIN: define i64* @fN() nounwind {
+; PLAIN: define i64* @fN() #0 {
 ; PLAIN:   %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
 ; PLAIN:   ret i64* %t
 ; PLAIN: }
-; PLAIN: define i64* @fO() nounwind {
+; PLAIN: define i64* @fO() #0 {
 ; PLAIN:   %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
 ; PLAIN:   ret i64* %t
 ; PLAIN: }
-; OPT: define i64* @fM() nounwind {
+; OPT: define i64* @fM() #0 {
 ; OPT:   ret i64* getelementptr (i64* null, i32 1)
 ; OPT: }
-; OPT: define i64* @fN() nounwind {
+; OPT: define i64* @fN() #0 {
 ; OPT:   ret i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
 ; OPT: }
-; OPT: define i64* @fO() nounwind {
+; OPT: define i64* @fO() #0 {
 ; OPT:   ret i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
 ; OPT: }
-; TO: define i64* @fM() nounwind {
+; TO: define i64* @fM() #0 {
 ; TO:   ret i64* inttoptr (i64 8 to i64*)
 ; TO: }
-; TO: define i64* @fN() nounwind {
+; TO: define i64* @fN() #0 {
 ; TO:   ret i64* inttoptr (i64 8 to i64*)
 ; TO: }
-; TO: define i64* @fO() nounwind {
+; TO: define i64* @fO() #0 {
 ; TO:   ret i64* inttoptr (i64 8 to i64*)
 ; TO: }
 ; SCEV: Classifying expressions for: @fM
@@ -428,14 +428,14 @@ define i64* @fO() nounwind {
   ret i64* %t
 }
 
-; PLAIN: define i32* @fZ() nounwind {
+; PLAIN: define i32* @fZ() #0 {
 ; PLAIN:   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
 ; PLAIN:   ret i32* %t
 ; PLAIN: }
-; OPT: define i32* @fZ() nounwind {
+; OPT: define i32* @fZ() #0 {
 ; OPT:   ret i32* getelementptr (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
 ; OPT: }
-; TO: define i32* @fZ() nounwind {
+; TO: define i32* @fZ() #0 {
 ; TO:   ret i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 1)
 ; TO: }
 ; SCEV: Classifying expressions for: @fZ
@@ -446,3 +446,5 @@ define i32* @fZ() nounwind {
   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
   ret i32* %t
 }
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Scripts/coff-dump.py.bat b/test/Scripts/coff-dump.py.bat
index 56428e1..56428e1 100644..100755
--- a/test/Scripts/coff-dump.py.bat
+++ b/test/Scripts/coff-dump.py.bat
diff --git a/test/Scripts/elf-dump.bat b/test/Scripts/elf-dump.bat
index 9c70808..9c70808 100644..100755
--- a/test/Scripts/elf-dump.bat
+++ b/test/Scripts/elf-dump.bat
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
index cec9fb6..7a35d31 100644
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@@ -1,5 +1,4 @@
-// RUN: llvm-tblgen %s | grep "\[(set" | count 2
-// RUN: llvm-tblgen %s | grep "\[\]" | count 2
+// RUN: llvm-tblgen %s | FileCheck %s
 
 class ValueType<int size, int value> {
   int Size = size;
@@ -85,3 +84,8 @@ multiclass myscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns
   vscalar<opcode, asmstr, patterns>;
 
 defm NOT : myscalar<0x10, "not", [[], [(set FR32:$dst, (f32 (not FR32:$src)))]]>;
+
+// CHECK: Pattern = [(set FR32:$dst, (f32 (not FR32:$src)))];
+// CHECK: Pattern = [];
+// CHECK: Pattern = [(set FR32:$dst, (f32 (not FR32:$src)))];
+// CHECK: Pattern = [];
diff --git a/test/TableGen/math.td b/test/TableGen/math.td
new file mode 100644
index 0000000..bde267a
--- /dev/null
+++ b/test/TableGen/math.td
@@ -0,0 +1,18 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+
+class Int<int value> {
+  int Value = value;
+}
+
+def v1024   : Int<1024>;
+// CHECK: def v1024
+// CHECK: Value = 1024
+
+def v1025   : Int<!add(v1024.Value, 1)>;
+// CHECK: def v1025
+// CHECK: Value = 1025
+
+def v2048   : Int<!add(v1024.Value, v1024.Value)>;
+// CHECK: def v2048
+// CHECK: Value = 2048
+
diff --git a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index e740b29..1226b98 100644
--- a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -1,15 +1,19 @@
-; RUN: opt < %s -argpromotion -S | grep nounwind | count 2
+; RUN: opt < %s -argpromotion -S | FileCheck %s
 
+; CHECK: define internal i32 @deref(i32 %x.val) #0 {
 define internal i32 @deref(i32* %x) nounwind {
 entry:
-	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
-	ret i32 %tmp2
+  %tmp2 = load i32* %x, align 4
+  ret i32 %tmp2
 }
 
 define i32 @f(i32 %x) {
 entry:
-	%x_addr = alloca i32		; <i32*> [#uses=2]
-	store i32 %x, i32* %x_addr, align 4
-	%tmp1 = call i32 @deref( i32* %x_addr ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp1
+  %x_addr = alloca i32
+  store i32 %x, i32* %x_addr, align 4
+; CHECK: %tmp1 = call i32 @deref(i32 %x_addr.val) [[NUW:#[0-9]+]]
+  %tmp1 = call i32 @deref( i32* %x_addr ) nounwind
+  ret i32 %tmp1
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/BBVectorize/X86/pr15289.ll b/test/Transforms/BBVectorize/X86/pr15289.ll
new file mode 100644
index 0000000..07cc5d8
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/pr15289.ll
@@ -0,0 +1,98 @@
+; RUN: opt < %s -basicaa -bb-vectorize -disable-output
+; This is a bugpoint-reduced test case. It did not always assert, but does reproduce the bug
+; and running under valgrind (or some similar tool) will catch the error.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.2.0"
+
+%0 = type { [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }] }
+%1 = type { [10 x [8 x i8]] }
+%2 = type { i64, i64 }
+%3 = type { [10 x i64], i64, i64, i64, i64, i64 }
+%4 = type { i64, i64, i64, i64, i64, i64 }
+%5 = type { [10 x i64] }
+%6 = type { [10 x float], [10 x float], [10 x float], [10 x float] }
+%struct.__st_parameter_dt.1.3.5.7 = type { %struct.__st_parameter_common.0.2.4.6, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
+%struct.__st_parameter_common.0.2.4.6 = type { i32, i32, i8*, i32, i32, i8*, i32* }
+
+@cctenso_ = external unnamed_addr global %0, align 32
+@ctenso_ = external unnamed_addr global %1, align 32
+@i_dim_ = external unnamed_addr global %2, align 16
+@itenso1_ = external unnamed_addr global %3, align 32
+@itenso2_ = external unnamed_addr global %4, align 32
+@ltenso_ = external unnamed_addr global %5, align 32
+@rtenso_ = external unnamed_addr global %6, align 32
+@.cst = external unnamed_addr constant [8 x i8], align 8
+@.cst1 = external unnamed_addr constant [3 x i8], align 8
+@.cst2 = external unnamed_addr constant [29 x i8], align 8
+@.cst3 = external unnamed_addr constant [32 x i8], align 64
+
+define void @cart_to_dc2y_(double* noalias nocapture %xx, double* noalias nocapture %yy, double* noalias nocapture %zz, [5 x { double, double }]* noalias nocapture %c2ten) nounwind uwtable {
+entry:
+  %0 = fmul double undef, undef
+  %1 = fmul double undef, undef
+  %2 = fadd double undef, undef
+  %3 = fmul double undef, 0x3FE8B8B76E3E9919
+  %4 = fsub double %0, %1
+  %5 = fsub double -0.000000e+00, undef
+  %6 = fmul double undef, undef
+  %7 = fmul double %4, %6
+  %8 = fmul double undef, 2.000000e+00
+  %9 = fmul double %8, undef
+  %10 = fmul double undef, %9
+  %11 = fmul double %10, undef
+  %12 = fsub double undef, %7
+  %13 = fmul double %3, %12
+  %14 = fmul double %3, undef
+  %15 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
+  store double %13, double* %15, align 8, !tbaa !0
+  %16 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
+  %17 = fmul double undef, %8
+  %18 = fmul double %17, undef
+  %19 = fmul double undef, %18
+  %20 = fadd double undef, undef
+  %21 = fmul double %3, %19
+  %22 = fsub double -0.000000e+00, %21
+  %23 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
+  store double %22, double* %23, align 8, !tbaa !0
+  %24 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
+  %25 = fmul double undef, 0x3FE42F601A8C6794
+  %26 = fmul double undef, 2.000000e+00
+  %27 = fsub double %26, %0
+  %28 = fmul double %6, undef
+  %29 = fsub double undef, %28
+  %30 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
+  store double undef, double* %30, align 8, !tbaa !0
+  %31 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
+  %32 = fmul double undef, %17
+  %33 = fmul double undef, %17
+  %34 = fmul double undef, %32
+  %35 = fmul double undef, %33
+  %36 = fsub double undef, %35
+  %37 = fmul double %3, %34
+  %38 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
+  store double %37, double* %38, align 8, !tbaa !0
+  %39 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
+  %40 = fmul double undef, %8
+  %41 = fmul double undef, %40
+  %42 = fmul double undef, %41
+  %43 = fsub double undef, %42
+  %44 = fmul double %3, %43
+  %45 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
+  store double %13, double* %45, align 8, !tbaa !0
+  %46 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
+  %47 = fsub double -0.000000e+00, %14
+  store double %47, double* %16, align 8, !tbaa !0
+  store double undef, double* %24, align 8, !tbaa !0
+  store double -0.000000e+00, double* %31, align 8, !tbaa !0
+  store double undef, double* %39, align 8, !tbaa !0
+  store double undef, double* %46, align 8, !tbaa !0
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!0 = metadata !{metadata !"alias set 17: real(kind=8)", metadata !1}
+!1 = metadata !{metadata !1}
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index d7b7d6b..e4d5152 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -124,8 +124,10 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 ; CHECK: ret double %R
 }
 
-; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
-; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly
+; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #1
+; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #1
 
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index 6d34cb1..c5ee70c 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -3,6 +3,6 @@
 declare i13 @llvm.cttz.i13(i13, i1)
 
 define i13 @test() {
-	%X = call i13 @llvm.cttz.i13(i13 0, i1 true)
+	%X = call i13 @llvm.cttz.i13(i13 0, i1 false)
 	ret i13 %X
 }
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 475cd8d..39c437c 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -81,6 +81,26 @@ LessThanOrEqualToTwo:
   ret i32 0
 }
 
+declare i32* @f(i32*)
+define void @test5(i32* %x, i32* %y) {
+; CHECK: @test5
+entry:
+  %pre = icmp eq i32* %x, null
+  br i1 %pre, label %return, label %loop
+
+loop:
+  %phi = phi i32* [ %sel, %loop ], [ %x, %entry ]
+; CHECK: %phi = phi i32* [ %f, %loop ], [ %x, %entry ]
+  %f = tail call i32* @f(i32* %phi)
+  %cmp1 = icmp ne i32* %f, %y
+  %sel = select i1 %cmp1, i32* %f, i32* null
+  %cmp2 = icmp eq i32* %sel, null
+  br i1 %cmp2, label %return, label %loop
+
+return:
+  ret void
+}
+
 define i32 @switch1(i32 %s) {
 ; CHECK: @switch1
 entry:
@@ -105,7 +125,7 @@ negative:
   ]
 
 out:
-  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
   ret i32 %p
 
 next:
diff --git a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
index 7c6c575..f049265 100644
--- a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
@@ -1,20 +1,20 @@
-; RUN: opt < %s -deadargelim -S > %t
-; RUN: cat %t | grep nounwind | count 2
-; RUN: cat %t | grep signext | count 2
-; RUN: cat %t | not grep inreg
-; RUN: cat %t | not grep zeroext
-; RUN: cat %t | not grep byval
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
-	%struct = type { }
+%struct = type { }
 
 @g = global i8 0
 
+; CHECK: define internal void @foo(i8 signext %y) [[NUW:#[0-9]+]]
+
 define internal zeroext i8 @foo(i8* inreg %p, i8 signext %y, ... )  nounwind {
-	store i8 %y, i8* @g
-	ret i8 0
+  store i8 %y, i8* @g
+  ret i8 0
 }
 
 define i32 @bar() {
-	%A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
-	ret i32 0
+; CHECK: call void @foo(i8 signext 1) [[NUW]]
+  %A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
+  ret i32 0
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 2f820ba..f5d2588 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -8,14 +8,14 @@ entry:
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
   call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
   call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
-; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) nounwind, !dbg !13
+; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !13
   %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
   ret i8* %0, !dbg !13
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) nounwind noinline ssp {
+define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) noinline nounwind ssp {
 entry:
   call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15)
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20)
@@ -38,6 +38,11 @@ bb2:                                              ; preds = %bb1, %bb
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+; CHECK: attributes #0 = { nounwind ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { noinline nounwind ssp }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", metadata !2, i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !3} ; [ DW_TAG_file_type ]
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index b07b60d..59eb458 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -37,13 +37,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 
 !0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", metadata !"clang version 3.2 (trunk 165305)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !8, metadata !9}
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !8, metadata !9}
 !5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
 !6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
 
 ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index dc92dc9..e41110c 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -deadargelim -S > %t
-; RUN: grep "define internal zeroext i32 @test1() nounwind" %t
-; RUN: grep "define internal <{ i32, i32 }> @test2" %t
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
 %Ty = type <{ i32, i32 }>
 
@@ -9,11 +7,13 @@
 ; the function and then changing too much.
 
 ; This checks if the return value attributes are not removed
+; CHECK: define internal zeroext i32 @test1() #0
 define internal zeroext i32 @test1(i32 %DEADARG1) nounwind {
         ret i32 1
 }
 
 ; This checks if the struct doesn't get non-packed
+; CHECK: define internal <{ i32, i32 }> @test2
 define internal <{ i32, i32 }> @test2(i32 %DEADARG1) {
         ret <{ i32, i32 }> <{ i32 1, i32 2 }>
 }
@@ -28,3 +28,4 @@ define void @caller() {
         ret void
 }
 
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index 946453f..36a7658 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -1,18 +1,24 @@
-; RUN: opt < %s -basicaa -functionattrs -S | grep readnone | count 4
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 @x = global i32 0
 
+; CHECK: declare i32 @e() #0
 declare i32 @e() readnone
 
+; CHECK: define i32 @f() #0
 define i32 @f() {
 	%tmp = call i32 @e( )		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
+; CHECK: define i32 @g() #0
 define i32 @g() readonly {
 	ret i32 0
 }
 
+; CHECK: define i32 @h() #0
 define i32 @h() readnone {
 	%tmp = load i32* @x		; <i32> [#uses=1]
 	ret i32 %tmp
 }
+
+; CHECK: attributes #0 = { readnone }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
index 22eca13..d8256ae 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
@@ -1,9 +1,13 @@
-; RUN: opt < %s -basicaa -functionattrs -S | grep readonly | count 2
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 
+; CHECK: define i32 @f() #0
 define i32 @f() {
 entry:
-	%tmp = call i32 @e( )		; <i32> [#uses=1]
-	ret i32 %tmp
+  %tmp = call i32 @e( )
+  ret i32 %tmp
 }
 
+; CHECK: declare i32 @e() #0
 declare i32 @e() readonly
+
+; CHECK: attributes #0 = { readonly }
diff --git a/test/Transforms/FunctionAttrs/atomic.ll b/test/Transforms/FunctionAttrs/atomic.ll
index 7c2bff7..027ee0f 100644
--- a/test/Transforms/FunctionAttrs/atomic.ll
+++ b/test/Transforms/FunctionAttrs/atomic.ll
@@ -3,7 +3,7 @@
 ; Atomic load/store to local doesn't affect whether a function is
 ; readnone/readonly.
 define i32 @test1(i32 %x) uwtable ssp {
-; CHECK: define i32 @test1(i32 %x) uwtable readnone ssp {
+; CHECK: define i32 @test1(i32 %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store atomic i32 %x, i32* %x.addr seq_cst, align 4
@@ -13,9 +13,11 @@ entry:
 
 ; A function with an Acquire load is not readonly.
 define i32 @test2(i32* %x) uwtable ssp {
-; CHECK: define i32 @test2(i32* nocapture %x) uwtable ssp {
+; CHECK: define i32 @test2(i32* nocapture %x) #1 {
 entry:
   %r = load atomic i32* %x seq_cst, align 4
   ret i32 %r
 }
 
+; CHECK: attributes #0 = { readnone ssp uwtable }
+; CHECK: attributes #1 = { ssp uwtable }
diff --git a/test/Transforms/FunctionAttrs/noreturn.ll b/test/Transforms/FunctionAttrs/noreturn.ll
new file mode 100644
index 0000000..470ebcb
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/noreturn.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -functionattrs -instcombine -S | FileCheck %s
+
+define void @endless_loop() noreturn nounwind readnone ssp uwtable {
+entry:
+  br label %while.body
+
+while.body:
+  br label %while.body
+}
+;CHECK: @main
+;CHECK: endless_loop
+;CHECK: ret
+define i32 @main() noreturn nounwind ssp uwtable {
+entry:
+  tail call void @endless_loop()
+  unreachable
+}
+
diff --git a/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
new file mode 100644
index 0000000..9295c20
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; rdar://12580965.
+; ObjC++ test case.
+
+%struct.ButtonInitData = type { i8* }
+
+@_ZL14buttonInitData = internal global [1 x %struct.ButtonInitData] zeroinitializer, align 4
+
+@"\01L_OBJC_METH_VAR_NAME_40" = internal global [7 x i8] c"print:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_41" = internal externally_initialized  global i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@llvm.used = appending global [2 x i8*] [i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0),  i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_41" to i8*)]
+
+define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_41", !invariant.load !2009
+  store i8* %1, i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+declare void @test(i8*)
+
+define void @print() {
+; CHECK: %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  call void @test(i8* %1)
+  ret void
+}
+
+!2009 = metadata !{}
diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll
index 5a34a9c..cf025ec 100644
--- a/test/Transforms/GlobalOpt/integer-bool.ll
+++ b/test/Transforms/GlobalOpt/integer-bool.ll
@@ -1,23 +1,28 @@
-; RUN: opt < %s -globalopt -instcombine | \
-; RUN:    llvm-dis | grep "ret i1 true"
-
+; RUN: opt < %s -S -globalopt -instcombine | FileCheck %s
 ;; check that global opt turns integers that only hold 0 or 1 into bools.
 
-@G = internal global i32 0              ; <i32*> [#uses=3]
+@G = internal addrspace(1) global i32 0
+; CHECK @G.b
+; CHECK addrspace(1)
+; CHECK global i1 0
 
 define void @set1() {
-        store i32 0, i32* @G
-        ret void
+  store i32 0, i32 addrspace(1)* @G
+; CHECK: store i1 false
+  ret void
 }
 
 define void @set2() {
-        store i32 1, i32* @G
-        ret void
+  store i32 1, i32 addrspace(1)* @G
+; CHECK: store i1 true
+  ret void
 }
 
 define i1 @get() {
-        %A = load i32* @G               ; <i32> [#uses=1]
-        %C = icmp slt i32 %A, 2         ; <i1> [#uses=1]
-        ret i1 %C
+; CHECK @get
+  %A = load i32 addrspace(1) * @G
+  %C = icmp slt i32 %A, 2
+  ret i1 %C
+; CHECK: ret i1 true
 }
 
diff --git a/test/Transforms/IPConstantProp/user-with-multiple-uses.ll b/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
index 402ea41..9687180 100644
--- a/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
+++ b/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
@@ -4,9 +4,9 @@
 ; IPSCCP should propagate the 0 argument, eliminate the switch, and propagate
 ; the result.
 
-; CHECK: define i32 @main() noreturn nounwind {
+; CHECK: define i32 @main() #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) nounwind
+; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret i32 123
 
 define i32 @main() noreturn nounwind {
@@ -28,3 +28,7 @@ sw.default:
 return:
   ret i32 0
 }
+
+; CHECK: attributes #0 = { noreturn nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index 9f5f670..c53bb5a 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -330,7 +330,7 @@ terminate:
 ; CHECK-NEXT: br label %[[JOIN]]
 ; CHECK:    [[JOIN]]:
 ; CHECK-NEXT: phi { i8*, i32 }
-; CHECK-NEXT: call void @opaque() nounwind
+; CHECK-NEXT: call void @opaque() [[NUW:#[0-9]+]]
 ; CHECK-NEXT: br label %[[FIX:[^\s]+]]
 ; CHECK:    lpad:
 ; CHECK-NEXT: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
@@ -340,3 +340,8 @@ terminate:
 ; CHECK-NEXT: [[T1:%.*]] = phi i32 [ 0, %[[JOIN]] ], [ 1, %lpad ]
 ; CHECK-NEXT: call void @use(i32 [[T1]])
 ; CHECK-NEXT: call void @_ZSt9terminatev()
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { ssp uwtable }
+; CHECK: attributes #3 = { noreturn nounwind }
diff --git a/test/Transforms/Inline/inline_ssp.ll b/test/Transforms/Inline/inline_ssp.ll
new file mode 100644
index 0000000..a4b43a7
--- /dev/null
+++ b/test/Transforms/Inline/inline_ssp.ll
@@ -0,0 +1,160 @@
+; RUN: opt -inline %s -S | FileCheck %s
+; Ensure SSP attributes are propagated correctly when inlining.
+
+@.str = private unnamed_addr constant [11 x i8] c"fun_nossp\0A\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"fun_ssp\0A\00", align 1
+@.str2 = private unnamed_addr constant [15 x i8] c"fun_sspstrong\0A\00", align 1
+@.str3 = private unnamed_addr constant [12 x i8] c"fun_sspreq\0A\00", align 1
+
+; These first four functions (@fun_sspreq, @fun_sspstrong, @fun_ssp, @fun_nossp)
+; are used by the remaining functions to ensure that the SSP attributes are
+; propagated correctly.  The caller should have its SSP attribute set as:
+; strictest(caller-ssp-attr, callee-ssp-attr), where strictness is ordered as:
+;  sspreq > sspstrong > ssp > [no ssp]
+define internal void @fun_sspreq() nounwind sspreq uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str3, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_sspstrong() nounwind sspstrong uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str2, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_ssp() nounwind ssp uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str1, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_nossp() nounwind uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+; Tests start below 
+
+define void @inline_req_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_req_req() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_req_strong() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_req_ssp() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_req_nossp() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_strong_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_strong_req() #0
+  call void @fun_sspstrong()
+  ret void
+}
+
+
+define void @inline_strong_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_strong_strong() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_strong_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_strong_ssp() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_strong_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_strong_nossp() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_ssp_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_ssp_req() #0
+  call void @fun_ssp()
+  ret void
+}
+
+
+define void @inline_ssp_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_ssp_strong() #1
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_ssp_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_ssp_ssp() #2
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_ssp_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_ssp_nossp() #2
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_nossp_req() nounwind uwtable sspreq {
+entry:
+; CHECK: @inline_nossp_req() #0
+  call void @fun_nossp()
+  ret void
+}
+
+
+define void @inline_nossp_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_nossp_strong() #1
+  call void @fun_nossp()
+  ret void
+}
+
+define void @inline_nossp_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_nossp_ssp() #2
+  call void @fun_nossp()
+  ret void
+}
+
+define void @inline_nossp_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_nossp_nossp() #3
+  call void @fun_nossp()
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+; CHECK: attributes #0 = { nounwind sspreq uwtable }
+; CHECK: attributes #1 = { nounwind sspstrong uwtable }
+; CHECK: attributes #2 = { nounwind ssp uwtable }
+; CHECK: attributes #3 = { nounwind uwtable }
diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
index 0907c49..2dedd44 100644
--- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
+++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
@@ -50,7 +50,7 @@ entry:
   %b = add <4 x i32> zeroinitializer, %a
   ret <4 x i32> %b
 ; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret <4 x i32> %a
 }
 
@@ -66,3 +66,7 @@ entry:
 
 declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind readnone ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/bitcast-vector-fold.ll b/test/Transforms/InstCombine/bitcast-vector-fold.ll
index 8feec22..8fd7f35 100644
--- a/test/Transforms/InstCombine/bitcast-vector-fold.ll
+++ b/test/Transforms/InstCombine/bitcast-vector-fold.ll
@@ -31,3 +31,8 @@ define <4 x i32> @test6() {
 	%tmp3 = bitcast <2 x double> <double 0.5, double 1.0> to <4 x i32>
 	ret <4 x i32> %tmp3
 }
+
+define i32 @test7() {
+       %tmp3 = bitcast <2 x half> <half 0xH1100, half 0xH0011> to i32
+       ret i32 %tmp3
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 8f6ae7d..1e61132 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -11,7 +11,7 @@ define i32 @test1(i64 %a) {
         %t3 = xor <2 x i32> %t1, %t2
         %t4 = extractelement <2 x i32> %t3, i32 0
         ret i32 %t4
-        
+
 ; CHECK: @test1
 ; CHECK: ret i32 0
 }
@@ -30,7 +30,7 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
 
   %add = fadd float %tmp24, %tmp4
   ret float %add
-  
+
 ; CHECK: @test2
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
 ; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
@@ -55,7 +55,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 
   %add = fadd float %tmp24, %tmp4
   ret float %add
-  
+
 ; CHECK: @test3
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
 ; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
@@ -75,7 +75,7 @@ define <2 x i32> @test4(i32 %A, i32 %B){
   ; CHECK: @test4
   ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0
   ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1
-  ; CHECK-NEXT: ret <2 x i32> 
+  ; CHECK-NEXT: ret <2 x i32>
 
 }
 
@@ -92,7 +92,7 @@ define <2 x float> @test5(float %A, float %B) {
   ; CHECK: @test5
   ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
   ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
-  ; CHECK-NEXT: ret <2 x float> 
+  ; CHECK-NEXT: ret <2 x float>
 }
 
 define <2 x float> @test6(float %A){
@@ -123,7 +123,7 @@ define i64 @Vec2(i64 %in) {
 }
 
 define i64 @All11(i64 %in) {
-  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1) 
+  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1)
   ret i64 %out
 ; CHECK: @All11
 ; CHECK: ret i64 0
@@ -131,9 +131,16 @@ define i64 @All11(i64 %in) {
 
 
 define i32 @All111(i32 %in) {
-  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1) 
+  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1)
   ret i32 %out
 ; CHECK: @All111
 ; CHECK: ret i32 0
 }
 
+define <2 x i16> @BitcastInsert(i32 %a) {
+  %v = insertelement <1 x i32> undef, i32 %a, i32 0
+  %r = bitcast <1 x i32> %v to <2 x i16>
+  ret <2 x i16> %r
+; CHECK: @BitcastInsert
+; CHECK: bitcast i32 %a to <2 x i16>
+}
diff --git a/test/Transforms/InstCombine/constant-expr-datalayout.ll b/test/Transforms/InstCombine/constant-expr-datalayout.ll
new file mode 100644
index 0000000..9a72c77
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-expr-datalayout.ll
@@ -0,0 +1,12 @@
+; RUN: opt -instcombine %s -S -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%test1.struct = type { i32, i32 }
+@test1.aligned_glbl = global %test1.struct zeroinitializer, align 4
+define void @test1(i64 *%ptr) {
+  store i64 and (i64 ptrtoint (i32* getelementptr (%test1.struct* @test1.aligned_glbl, i32 0, i32 1) to i64), i64 3), i64* %ptr
+; CHECK: store i64 0, i64* %ptr
+  ret void
+}
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
index 14741e3..88ca88c 100644
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -99,9 +99,9 @@ define i1 @ashr_icmp2(i64 %X) nounwind {
 ; PR9998
 ; Make sure we don't transform the ashr here into an sdiv
 ; CHECK: @pr9998
-; CHECK: = and i32 %V, 1
-; CHECK: %Z = icmp ne
-; CHECK: ret i1 %Z
+; CHECK:      [[BIT:%[A-Za-z0-9.]+]] = and i32 %V, 1
+; CHECK-NEXT: [[CMP:%[A-Za-z0-9.]+]] = icmp ne i32 [[BIT]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
 define i1 @pr9998(i32 %V) nounwind {
 entry:
   %W = shl i32 %V, 31
@@ -112,6 +112,7 @@ entry:
 }
 
 
+
 ; CHECK: @udiv_icmp1
 ; CHECK: icmp ne i64 %X, 0
 define i1 @udiv_icmp1(i64 %X) nounwind {
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 5d40d71..c97bd28 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -7,7 +7,7 @@ define float @fold(float %a) {
   %mul1 = fmul fast float %mul, 0x4002666660000000
   ret float %mul1
 ; CHECK: @fold
-; CHECK: fmul float %a, 0x4006147AE0000000
+; CHECK: fmul fast float %a, 0x4006147AE0000000
 }
 
 ; Same testing-case as the one used in fold() except that the operators have
@@ -22,7 +22,7 @@ define float @notfold(float %a) {
 
 define float @fold2(float %a) {
 ; CHECK: @fold2
-; CHECK: fmul float %a, 0x4006147AE0000000
+; CHECK: fmul fast float %a, 0x4006147AE0000000
   %mul = fmul float %a, 0x3FF3333340000000
   %mul1 = fmul fast float %mul, 0x4002666660000000
   ret float %mul1
@@ -54,7 +54,7 @@ define float @fold5(float %f1, float %f2) {
   %add1 = fadd fast float %add, 5.000000e+00
   ret float %add1
 ; CHECK: @fold5
-; CHECK: fadd float %f1, 9.000000e+00
+; CHECK: fadd fast float %f1, 9.000000e+00
 }
 
 ; (X + X) + X => 3.0 * X
@@ -97,17 +97,17 @@ define float @fold9(float %f1, float %f2) {
 }
 
 ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
-; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the 
+; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
 ; top of resulting simplified expression tree may potentially reveal some
 ; optimization opportunities in the super-expression trees.
-; 
+;
 define float @fold10(float %f1, float %f2) {
   %t1 = fadd fast float 2.000000e+00, %f1
   %t2 = fsub fast float %f2, 3.000000e+00
   %t3 = fadd fast float %t1, %t2
   ret float %t3
 ; CHECK: @fold10
-; CHECK: %t3 = fadd float %t2, -1.000000e+00
+; CHECK: %t3 = fadd fast float %t2, -1.000000e+00
 ; CHECK: ret float %t3
 }
 
@@ -130,37 +130,6 @@ define double @fail2(double %f1, double %f2) {
 ; CHECK: ret
 }
 
-; rdar://12753946:  x * cond ? 1.0 : 0.0 => cond ? x : 0.0
-define double @select1(i32 %cond, double %x, double %y) {
-  %tobool = icmp ne i32 %cond, 0
-  %cond1 = select i1 %tobool, double 1.000000e+00, double 0.000000e+00
-  %mul = fmul nnan nsz double %cond1, %x
-  %add = fadd double %mul, %y
-  ret double %add
-; CHECK: @select1
-; CHECK: select i1 %tobool, double %x, double 0.000000e+00
-}
-
-define double @select2(i32 %cond, double %x, double %y) {
-  %tobool = icmp ne i32 %cond, 0
-  %cond1 = select i1 %tobool, double 0.000000e+00, double 1.000000e+00
-  %mul = fmul nnan nsz double %cond1, %x
-  %add = fadd double %mul, %y
-  ret double %add
-; CHECK: @select2
-; CHECK: select i1 %tobool, double 0.000000e+00, double %x
-}
-
-define double @select3(i32 %cond, double %x, double %y) {
-  %tobool = icmp ne i32 %cond, 0
-  %cond1 = select i1 %tobool, double 0.000000e+00, double 2.000000e+00
-  %mul = fmul nnan nsz double %cond1, %x
-  %add = fadd double %mul, %y
-  ret double %add
-; CHECK: @select3
-; CHECK: fmul nnan nsz double %cond1, %x
-}
-
 ; =========================================================================
 ;
 ;   Testing-cases about fmul begin
@@ -172,7 +141,7 @@ define float @fmul_distribute1(float %f1) {
   %t1 = fmul float %f1, 6.0e+3
   %t2 = fadd float %t1, 2.0e+3
   %t3 = fmul fast float %t2, 5.0e+3
-  ret float %t3 
+  ret float %t3
 ; CHECK: @fmul_distribute1
 ; CHECK: %1 = fmul fast float %f1, 3.000000e+07
 ; CHECK: %t3 = fadd fast float %1, 1.000000e+07
@@ -205,9 +174,9 @@ define double @fmul_distribute3(double %f1) {
 
 ; C1/X * C2 => (C1*C2) / X
 define float @fmul2(float %f1) {
-  %t1 = fdiv float 2.0e+3, %f1 
+  %t1 = fdiv float 2.0e+3, %f1
   %t3 = fmul fast float %t1, 6.0e+3
-  ret float %t3 
+  ret float %t3
 ; CHECK: @fmul2
 ; CHECK: fdiv fast float 1.200000e+07, %f1
 }
@@ -216,7 +185,7 @@ define float @fmul2(float %f1) {
 define float @fmul3(float %f1, float %f2) {
   %t1 = fdiv float %f1, 2.0e+3
   %t3 = fmul fast float %t1, 6.0e+3
-  ret float %t3 
+  ret float %t3
 ; CHECK: @fmul3
 ; CHECK: fmul fast float %f1, 3.000000e+00
 }
@@ -227,21 +196,146 @@ define float @fmul3(float %f1, float %f2) {
 define float @fmul4(float %f1, float %f2) {
   %t1 = fdiv float %f1, 2.0e+3
   %t3 = fmul fast float %t1, 0x3810000000000000
-  ret float %t3 
+  ret float %t3
 ; CHECK: @fmul4
 ; CHECK: fmul fast float %t1, 0x3810000000000000
 }
 
-; X / C1 * C2 => X / (C2/C1) if  C1/C2 is either a special value of a denormal, 
+; X / C1 * C2 => X / (C2/C1) if  C1/C2 is either a special value of a denormal,
 ;  and C2/C1 is a normal value.
-; 
+;
 define float @fmul5(float %f1, float %f2) {
   %t1 = fdiv float %f1, 3.0e+0
   %t3 = fmul fast float %t1, 0x3810000000000000
-  ret float %t3 
+  ret float %t3
 ; CHECK: @fmul5
 ; CHECK: fdiv fast float %f1, 0x47E8000000000000
 }
 
+; (X*Y) * X => (X*X) * Y
+define float @fmul6(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  ret float %mul1
+; CHECK: @fmul6
+; CHECK: fmul fast float %f1, %f1
+}
+
+; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
+define float @fmul7(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  %add = fadd float %mul1, %mul
+  ret float %add
+; CHECK: @fmul7
+; CHECK: fmul fast float %mul, %f1
+}
+
+; =========================================================================
+;
+;   Testing-cases about negation
+;
+; =========================================================================
+define float @fneg1(float %f1, float %f2) {
+  %sub = fsub float -0.000000e+00, %f1
+  %sub1 = fsub nsz float 0.000000e+00, %f2
+  %mul = fmul float %sub, %sub1
+  ret float %mul
+; CHECK: @fneg1
+; CHECK: fmul float %f1, %f2
+}
+
+; =========================================================================
+;
+;   Testing-cases about div
+;
+; =========================================================================
+
+; X/C1 / C2 => X * (1/(C2*C1))
+define float @fdiv1(float %x) {
+  %div = fdiv float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FD7303B60000000 = 0.36231884057971014492
+; CHECK: @fdiv1
+; CHECK: fmul fast float %x, 0x3FD7303B60000000
+}
+
+; X*C1 / C2 => X * (C1/C2)
+define float @fdiv2(float %x) {
+  %mul = fmul float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %mul, 0x4002666660000000
+  ret float %div1
+
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FE0B21660000000 = 0.52173918485641479492
+; CHECK: @fdiv2
+; CHECK: fmul fast float %x, 0x3FE0B21660000000
+}
+
+; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
+;
+define float @fdiv3(float %x) {
+  %div = fdiv float %x, 0x47EFFFFFE0000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; CHECK: @fdiv3
+; CHECK: fdiv float %x, 0x47EFFFFFE0000000
+}
+
+; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
+define float @fdiv4(float %x) {
+  %mul = fmul float %x, 0x47EFFFFFE0000000
+  %div = fdiv float %mul, 0x3FC99999A0000000
+  ret float %div
+; CHECK: @fdiv4
+; CHECK: fmul float %x, 0x47EFFFFFE0000000
+}
+
+; (X/Y)/Z = > X/(Y*Z)
+define float @fdiv5(float %f1, float %f2, float %f3) {
+  %t1 = fdiv float %f1, %f2
+  %t2 = fdiv fast float %t1, %f3
+  ret float %t2
+; CHECK: @fdiv5
+; CHECK: fmul float %f2, %f3
+}
 
+; Z/(X/Y) = > (Z*Y)/X
+define float @fdiv6(float %f1, float %f2, float %f3) {
+  %t1 = fdiv float %f1, %f2
+  %t2 = fdiv fast float %f3, %t1
+  ret float %t2
+; CHECK: @fdiv6
+; CHECK: fmul float %f3, %f2
+}
 
+; C1/(X*C2) => (C1/C2) / X
+define float @fdiv7(float %x) {
+  %t1 = fmul float %x, 3.0e0
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv7
+; CHECK: fdiv fast float 5.000000e+00, %x
+}
+
+; C1/(X/C2) => (C1*C2) / X
+define float @fdiv8(float %x) {
+  %t1 = fdiv float %x, 3.0e0
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv8
+; CHECK: fdiv fast float 4.500000e+01, %x
+}
+
+; C1/(C2/X) => (C1/C2) * X
+define float @fdiv9(float %x) {
+  %t1 = fdiv float 3.0e0, %x
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv9
+; CHECK: fmul fast float %x, 5.000000e+00
+}
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
new file mode 100644
index 0000000..3671b4c
--- /dev/null
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; (-0.0 - X) * C => X * -C
+define float @test1(float %x) {
+  %sub = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test1
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (0.0 - X) * C => X * -C
+define float @test2(float %x) {
+  %sub = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test2
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (-0.0 - X) * (-0.0 - Y) => X * Y
+define float @test3(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %sub2 = fsub float -0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test3
+; CHECK: fmul float %x, %y
+}
+
+; (0.0 - X) * (0.0 - Y) => X * Y
+define float @test4(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %sub2 = fsub nsz float 0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test4
+; CHECK: fmul float %x, %y
+}
+
+; (-0.0 - X) * Y => -0.0 - (X * Y)
+define float @test5(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test5
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; (0.0 - X) * Y => 0.0 - (X * Y)
+define float @test6(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test6
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; "(-0.0 - X) * Y => -0.0 - (X * Y)" is disabled if expression "-0.0 - X"
+; has multiple uses.
+define float @test7(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  %mul2 = fmul float %mul, %sub1
+  ret float %mul2
+; CHECK: @test7
+; CHECK: fsub float -0.000000e+00, %x
+}
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index bc6aa0a..09f0532 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -13,3 +13,22 @@ define i8 @test2() {
 ; CHECK: ret i8 -1
 }
 
+; CHECK: test3
+define half @test3(float %a) {
+; CHECK: fptrunc
+; CHECK: llvm.fabs.f16
+  %b = call float @llvm.fabs.f32(float %a)
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+; CHECK: test4
+define half @test4(float %a) {
+; CHECK: fptrunc
+; CHECK: fsub
+  %b = fsub float -0.0, %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+declare float @llvm.fabs.f32(float) nounwind readonly
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 1c120ec..bb07736 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -424,7 +424,7 @@ define i32 @test35() nounwind {
              i8* getelementptr (%t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
   ret i32 0
 ; CHECK: @test35
-; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) nounwind
+; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) [[NUW:#[0-9]+]]
 }
 
 ; Instcombine should constant-fold the GEP so that indices that have
@@ -492,3 +492,21 @@ define void @three_gep_f(%three_gep_t2* %x) {
 
 declare void @three_gep_g(i32*)
 declare void @three_gep_h(%three_gep_t2*)
+
+%struct.ham = type { i32, %struct.zot*, %struct.zot*, %struct.zot* }
+%struct.zot = type { i64, i8 }
+
+define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
+  %tmp = getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
+  %tmp2 = load %struct.zot** %tmp, align 8
+  %tmp3 = bitcast %struct.zot* %tmp2 to i8*
+  %tmp4 = getelementptr inbounds i8* %tmp3, i64 -8
+  store i8 %arg1, i8* %tmp4, align 8
+  ret void
+
+; CHECK: @test39
+; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
+; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 8fb6144..331eb3f 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -706,3 +706,41 @@ define i1 @test69(i32 %c) nounwind uwtable {
   %3 = or i1 %1, %2
   ret i1 %3
 }
+
+; CHECK: @icmp_sext16trunc
+; CHECK-NEXT: %1 = trunc i32 %x to i16
+; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
+define i1 @icmp_sext16trunc(i32 %x) {
+  %trunc = trunc i32 %x to i16
+  %sext = sext i16 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sext8trunc
+; CHECK-NEXT: %1 = trunc i32 %x to i8
+; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
+define i1 @icmp_sext8trunc(i32 %x) {
+  %trunc = trunc i32 %x to i8
+  %sext = sext i8 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl16
+; CHECK-NEXT: %1 = trunc i32 %x to i16
+; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
+define i1 @icmp_shl16(i32 %x) {
+  %shl = shl i32 %x, 16
+  %cmp = icmp slt i32 %shl, 2359296
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl24
+; CHECK-NEXT: %1 = trunc i32 %x to i8
+; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
+define i1 @icmp_shl24(i32 %x) {
+  %shl = shl i32 %x, 24
+  %cmp = icmp slt i32 %shl, 603979776
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 93f0a95..f334b3b 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -220,3 +220,39 @@ define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
 ; CHECK: @cttz_simplify1b
 ; CHECK-NEXT: ret i32 0
 }
+
+define i32 @ctlz_undef(i32 %Value) nounwind {
+  %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true)
+  ret i32 %ctlz
+
+; CHECK: @ctlz_undef
+; CHECK-NEXT: ret i32 undef
+}
+
+define i32 @cttz_undef(i32 %Value) nounwind {
+  %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
+  ret i32 %cttz
+
+; CHECK: @cttz_undef
+; CHECK-NEXT: ret i32 undef
+}
+
+define i32 @ctlz_select(i32 %Value) nounwind {
+  %tobool = icmp ne i32 %Value, 0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %ctlz, i32 32
+  ret i32 %s
+
+; CHECK: @ctlz_select
+; CHECK: select i1 %tobool, i32 %ctlz, i32 32
+}
+
+define i32 @cttz_select(i32 %Value) nounwind {
+  %tobool = icmp ne i32 %Value, 0
+  %cttz = call i32 @llvm.cttz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %cttz, i32 32
+  ret i32 %s
+
+; CHECK: @cttz_select
+; CHECK: select i1 %tobool, i32 %cttz, i32 32
+}
diff --git a/test/Transforms/InstCombine/load3.ll b/test/Transforms/InstCombine/load3.ll
index 35398e1..db74426 100644
--- a/test/Transforms/InstCombine/load3.ll
+++ b/test/Transforms/InstCombine/load3.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.0.0"
 
 ; Instcombine should be able to do trivial CSE of loads.
 
@@ -24,4 +24,23 @@ define float @test2() {
   
 ; CHECK: @test2
 ; CHECK: ret float 0x3806965600000000
-}
-\ No newline at end of file
+}
+
+@rslts32 = global [36 x i32] zeroinitializer, align 4
+
+@expect32 = internal constant [36 x i32][ i32 1, i32 2, i32 0, i32 100, i32 3,
+i32 4, i32 0, i32 -7, i32 4, i32 4, i32 8, i32 8, i32 1, i32 3, i32 8, i32 3,
+i32 4, i32 -2, i32 2, i32 8, i32 83, i32 77, i32 8, i32 17, i32 77, i32 88, i32
+22, i32 33, i32 44, i32 88, i32 77, i32 4, i32 4, i32 7, i32 -7, i32 -8] ,
+align 4
+
+; PR14986
+define void @test3() nounwind {
+; This is a weird way of computing zero.
+  %l = load i32* getelementptr ([36 x i32]* @expect32, i32 29826161, i32 28), align 4
+  store i32 %l, i32* getelementptr ([36 x i32]* @rslts32, i32 29826161, i32 28), align 4
+  ret void
+
+; CHECK: @test3
+; CHECK: store i32 1, i32* getelementptr inbounds ([36 x i32]* @rslts32, i32 0, i32 0)
+}
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index bb59817..f8c0676 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -10,8 +10,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %j = or i32 %g, %i
   ret i32 %j
 ; CHECK: %e = icmp slt i32 %a, %b
-; CHECK: %j = select i1 %e, i32 %c, i32 %d
-; CHECK: ret i32 %j
+; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %e = icmp slt i32 %a, %b
@@ -22,8 +22,8 @@ define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %j = or i32 %i, %g
   ret i32 %j
 ; CHECK: %e = icmp slt i32 %a, %b
-; CHECK: %j = select i1 %e, i32 %c, i32 %d
-; CHECK: ret i32 %j
+; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 
 define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
@@ -36,8 +36,8 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 entry:
@@ -49,8 +49,8 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 
 define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
@@ -63,6 +63,6 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index bbc70fe..16213b8 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -138,10 +138,8 @@ define i32 @test16(i32 %b, i1 %c) {
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
-; CHECK: [[TEST16:%.*]] = zext i1 %c to i32
-; CHECK-NEXT: %1 = sub i32 0, [[TEST16]]
-; CHECK-NEXT: %e = and i32 %1, %b
-; CHECK-NEXT: ret i32 %e
+; CHECK: [[TEST16:%.*]] = select i1 %c, i32 %b, i32 0
+; CHECK-NEXT: ret i32 [[TEST16]]
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 0ead9d1..31a3cb4 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -256,131 +256,3 @@ xpto:
 return:
   ret i32 7
 }
-
-declare noalias i8* @valloc(i32) nounwind
-
-; CHECK: @test14
-; CHECK: ret i32 6
-define i32 @test14(i32 %a) nounwind {
-  switch i32 %a, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %call = tail call noalias i8* @malloc(i32 6) nounwind
-  br label %sw.epilog
-
-sw.bb1:
-  %call2 = tail call noalias i8* @calloc(i32 3, i32 2) nounwind
-  br label %sw.epilog
-
-sw.default:
-  %call3 = tail call noalias i8* @valloc(i32 6) nounwind
-  br label %sw.epilog
-
-sw.epilog:
-  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
-  ret i32 %1
-}
-
-; CHECK: @test15
-; CHECK: llvm.objectsize
-define i32 @test15(i32 %a) nounwind {
-  switch i32 %a, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %call = tail call noalias i8* @malloc(i32 3) nounwind
-  br label %sw.epilog
-
-sw.bb1:
-  %call2 = tail call noalias i8* @calloc(i32 2, i32 1) nounwind
-  br label %sw.epilog
-
-sw.default:
-  %call3 = tail call noalias i8* @valloc(i32 3) nounwind
-  br label %sw.epilog
-
-sw.epilog:
-  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
-  ret i32 %1
-}
-
-; CHECK: @test16
-; CHECK: llvm.objectsize
-define i32 @test16(i8* %a, i32 %n) nounwind {
-  %b = alloca [5 x i8], align 1
-  %c = alloca [5 x i8], align 1
-  switch i32 %n, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %bp = bitcast [5 x i8]* %b to i8*
-  br label %sw.epilog
-
-sw.bb1:
-  %cp = bitcast [5 x i8]* %c to i8*
-  br label %sw.epilog
-
-sw.default:
-  br label %sw.epilog
-
-sw.epilog:
-  %phi = phi i8* [ %a, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
-  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
-  ret i32 %sz
-}
-
-; CHECK: @test17
-; CHECK: ret i32 5
-define i32 @test17(i32 %n) nounwind {
-  %b = alloca [5 x i8], align 1
-  %c = alloca [5 x i8], align 1
-  %bp = bitcast [5 x i8]* %b to i8*
-  switch i32 %n, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  br label %sw.epilog
-
-sw.bb1:
-  %cp = bitcast [5 x i8]* %c to i8*
-  br label %sw.epilog
-
-sw.default:
-  br label %sw.epilog
-
-sw.epilog:
-  %phi = phi i8* [ %bp, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
-  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
-  ret i32 %sz
-}
-
-@globalalias = alias internal [60 x i8]* @a
-
-; CHECK: @test18
-; CHECK-NEXT: ret i32 60
-define i32 @test18() {
-  %bc = bitcast [60 x i8]* @globalalias to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
-  ret i32 %1
-}
-
-@globalalias2 = alias weak [60 x i8]* @a
-
-; CHECK: @test19
-; CHECK: llvm.objectsize
-define i32 @test19() {
-  %bc = bitcast [60 x i8]* @globalalias2 to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
-  ret i32 %1
-}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index c0bb28d..bde2a54 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -344,10 +344,9 @@ define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32
   %and.i = and <4 x i32> %vecinit6.i191, %neg.i   ; <<4 x i32>> [#uses=1]
   %or.i = or <4 x i32> %and.i, %and.i129          ; <<4 x i32>> [#uses=1]
   ret <4 x i32> %or.i
-; Don't turn this into a vector select until codegen matures to handle them
-; better.
+; codegen is mature enough to handle vector selects.
 ; CHECK: @test32
-; CHECK: or <4 x i32> %and.i, %and.i129
+; CHECK: select <4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191
 }
 
 define i1 @test33(i1 %X, i1 %Y) {
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index c8e5f38..8a311f0 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -30,7 +30,7 @@ define double @test_simplify2(double %x) {
 define float @test_simplify3(float %x) {
 ; CHECK: @test_simplify3
   %retval = call float @powf(float 2.0, float %x)
-; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call float @exp2f(float %x) nounwind readonly
+; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call float @exp2f(float %x) [[NUW_RO:#[0-9]+]]
   ret float %retval
 ; CHECK-NEXT: ret float [[EXP2F]]
 }
@@ -38,7 +38,7 @@ define float @test_simplify3(float %x) {
 define double @test_simplify4(double %x) {
 ; CHECK: @test_simplify4
   %retval = call double @pow(double 2.0, double %x)
-; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call double @exp2(double %x) nounwind readonly
+; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call double @exp2(double %x) [[NUW_RO]]
   ret double %retval
 ; CHECK-NEXT: ret double [[EXP2]]
 }
@@ -64,8 +64,8 @@ define double @test_simplify6(double %x) {
 define float @test_simplify7(float %x) {
 ; CHECK: @test_simplify7
   %retval = call float @powf(float %x, float 0.5)
-; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) nounwind readonly
-; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) nounwind readonly
+; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]]
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]]
   ret float %retval
@@ -75,8 +75,8 @@ define float @test_simplify7(float %x) {
 define double @test_simplify8(double %x) {
 ; CHECK: @test_simplify8
   %retval = call double @pow(double %x, double 0.5)
-; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) nounwind readonly
-; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) nounwind readonly
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
 ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
   ret double %retval
@@ -150,3 +150,5 @@ define double @test_simplify16(double %x) {
   ret double %retval
 ; CHECK-NEXT: ret double [[RECIPROCAL]]
 }
+
+; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
diff --git a/test/Transforms/InstCombine/ptr-int-cast.ll b/test/Transforms/InstCombine/ptr-int-cast.ll
index 9524d44..7a6ecff 100644
--- a/test/Transforms/InstCombine/ptr-int-cast.ll
+++ b/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -27,3 +27,34 @@ define i64 @f0(i32 %a0) nounwind {
        ret i64 %t1
 }
 
+define <4 x i32> @test4(<4 x i8*> %arg) nounwind {
+; CHECK: @test4
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: trunc <4 x i64> %1 to <4 x i32>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  ret <4 x i32> %p1
+}
+
+define <4 x i128> @test5(<4 x i8*> %arg) nounwind {
+; CHECK: @test5
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: zext <4 x i64> %1 to <4 x i128>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i128>
+  ret <4 x i128> %p1
+}
+
+define <4 x i8*> @test6(<4 x i32> %arg) nounwind {
+; CHECK: @test6
+; CHECK: zext <4 x i32> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i32> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
+
+define <4 x i8*> @test7(<4 x i128> %arg) nounwind {
+; CHECK: @test7
+; CHECK: trunc <4 x i128> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i128> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index f198797..968f37c 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -184,3 +184,12 @@ define i32 @test16(i16 %x) nounwind {
 ; CHECK-NEXT: %ext = sext i16 %sext to i32
 ; CHECK-NEXT: ret i32 %ext
 }
+
+define i32 @test17(i1 %x) nounwind {
+  %c1 = sext i1 %x to i32
+  %c2 = sub i32 0, %c1
+  ret i32 %c2
+; CHECK: @test17
+; CHECK-NEXT: [[TEST17:%.*]] = zext i1 %x to i32
+; CHECK-NEXT: ret i32 [[TEST17]]
+}
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 63e4ee2..166066a 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -7,3 +7,13 @@ define i32 @test(float %f) {
         ret i32 %tmp19
 }
 
+define i64 @test2(i64 %in) {
+  %vec = insertelement <8 x i64> undef, i64 %in, i32 0
+  %splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer
+  %add = add <8 x i64> %splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %scl1 = extractelement <8 x i64> %add, i32 0
+  %scl2 = extractelement <8 x i64> %add, i32 0
+  %r = add i64 %scl1, %scl2
+  ret i64 %r
+}
+
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 7bbf53c..2f2990b 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -64,7 +64,8 @@ entry:
         
 ; CHECK: @test5
 ; CHECK:   sext <4 x i1> %cmp to <4 x i32>	
-; CHECK:   sext <4 x i1> %cmp4 to <4 x i32>	
+; The sext-and pair is canonicalized to a select.
+; CHECK:   select <4 x i1> %cmp4, <4 x i32>	%sext, <4 x i32> zeroinitializer
 }
 
 
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
index 78bcedb..b531057 100644
--- a/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -4,9 +4,9 @@
 define i32 @a(i1 zeroext %x, i1 zeroext %y) {
 entry:
 ; CHECK: @a
-; CHECK: [[TMP1:%.*]] = zext i1 %y to i32
+; CHECK: [[TMP1:%.*]] = sext i1 %y to i32
 ; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1
-; CHECK-NEXT: sub i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: add i32 [[TMP2]], [[TMP1]]
   %conv = zext i1 %x to i32
   %conv3 = zext i1 %y to i32
   %conv3.neg = sub i32 0, %conv3
diff --git a/test/Transforms/InstSimplify/call-callconv.ll b/test/Transforms/InstSimplify/call-callconv.ll
new file mode 100644
index 0000000..e475be7
--- /dev/null
+++ b/test/Transforms/InstSimplify/call-callconv.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Verify that the non-default calling conv doesn't prevent the libcall simplification
+
+@.str = private unnamed_addr constant [4 x i8] c"abc\00", align 1
+
+define arm_aapcscc i32 @_abs(i32 %i) nounwind readnone {
+; CHECK: _abs
+  %call = tail call arm_aapcscc i32 @abs(i32 %i) nounwind readnone
+  ret i32 %call
+; CHECK: %[[ISPOS:.*]] = icmp sgt i32 %i, -1
+; CHECK: %[[NEG:.*]] = sub i32 0, %i
+; CHECK: %[[RET:.*]] = select i1 %[[ISPOS]], i32 %i, i32 %[[NEG]]
+; CHECK: ret i32 %[[RET]]
+}
+
+declare arm_aapcscc i32 @abs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_labs(i32 %i) nounwind readnone {
+; CHECK: _labs
+  %call = tail call arm_aapcscc i32 @labs(i32 %i) nounwind readnone
+  ret i32 %call
+; CHECK: %[[ISPOS:.*]] = icmp sgt i32 %i, -1
+; CHECK: %[[NEG:.*]] = sub i32 0, %i
+; CHECK: %[[RET:.*]] = select i1 %[[ISPOS]], i32 %i, i32 %[[NEG]]
+; CHECK: ret i32 %[[RET]]
+}
+
+declare arm_aapcscc i32 @labs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_strlen1() {
+; CHECK: _strlen1
+  %call = tail call arm_aapcscc i32 @strlen(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0))
+  ret i32 %call
+; CHECK: ret i32 3
+}
+
+declare arm_aapcscc i32 @strlen(i8*)
+
+define arm_aapcscc zeroext i1 @_strlen2(i8* %str) {
+; CHECK: _strlen2
+  %call = tail call arm_aapcscc i32 @strlen(i8* %str)
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+
+; CHECK: %[[STRLENFIRST:.*]] = load i8* %str
+; CHECK: %[[CMP:.*]] = icmp ne i8 %[[STRLENFIRST]], 0
+; CHECK: ret i1 %[[CMP]]
+}
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 1a8d0c2..cf2f847 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -50,3 +50,54 @@ define float @test_fabs_libcall() {
   ret float %x
 ; CHECK-NEXT: ret float 4.2{{0+}}e+01
 }
+
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare float @llvm.trunc.f32(float) nounwind readnone
+declare float @llvm.rint.f32(float) nounwind readnone
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+
+; Test idempotent intrinsics
+define float @test_idempotence(float %a) {
+; CHECK: @test_idempotence
+
+; CHECK: fabs
+; CHECK-NOT: fabs
+  %a0 = call float @llvm.fabs.f32(float %a)
+  %a1 = call float @llvm.fabs.f32(float %a0)
+
+; CHECK: floor
+; CHECK-NOT: floor
+  %b0 = call float @llvm.floor.f32(float %a)
+  %b1 = call float @llvm.floor.f32(float %b0)
+
+; CHECK: ceil
+; CHECK-NOT: ceil
+  %c0 = call float @llvm.ceil.f32(float %a)
+  %c1 = call float @llvm.ceil.f32(float %c0)
+
+; CHECK: trunc
+; CHECK-NOT: trunc
+  %d0 = call float @llvm.trunc.f32(float %a)
+  %d1 = call float @llvm.trunc.f32(float %d0)
+
+; CHECK: rint
+; CHECK-NOT: rint
+  %e0 = call float @llvm.rint.f32(float %a)
+  %e1 = call float @llvm.rint.f32(float %e0)
+
+; CHECK: nearbyint
+; CHECK-NOT: nearbyint
+  %f0 = call float @llvm.nearbyint.f32(float %a)
+  %f1 = call float @llvm.nearbyint.f32(float %f0)
+
+  %r0 = fadd float %a1, %b1
+  %r1 = fadd float %r0, %c1
+  %r2 = fadd float %r1, %d1
+  %r3 = fadd float %r2, %e1
+  %r4 = fadd float %r3, %f1
+
+  ret float %r4
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 56627b9..0ecfb1f 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -647,3 +647,38 @@ unreachableblock:
   %Y = icmp eq i32* %X, null
   ret i1 %Y
 }
+
+; It's not valid to fold a comparison of an argument with an alloca, even though
+; that's tempting. An argument can't *alias* an alloca, however the aliasing rule
+; relies on restrictions against guessing an object's address and dereferencing.
+; There are no restrictions against guessing an object's address and comparing.
+
+define i1 @alloca_argument_compare(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %arg, %alloc
+  ret i1 %cmp
+  ; CHECK: alloca_argument_compare
+  ; CHECK: ret i1 %cmp
+}
+
+; As above, but with the operands reversed.
+
+define i1 @alloca_argument_compare_swapped(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %alloc, %arg
+  ret i1 %cmp
+  ; CHECK: alloca_argument_compare_swapped
+  ; CHECK: ret i1 %cmp
+}
+
+; Don't assume that a noalias argument isn't equal to a global variable's
+; address. This is an example where AliasAnalysis' NoAlias concept is
+; different from actual pointer inequality.
+
+@y = external global i32
+define zeroext i1 @external_compare(i32* noalias %x) {
+  %cmp = icmp eq i32* %x, @y
+  ret i1 %cmp
+  ; CHECK: external_compare
+  ; CHECK: ret i1 %cmp
+}
diff --git a/test/Transforms/InstSimplify/past-the-end.ll b/test/Transforms/InstSimplify/past-the-end.ll
new file mode 100644
index 0000000..075da4a
--- /dev/null
+++ b/test/Transforms/InstSimplify/past-the-end.ll
@@ -0,0 +1,77 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "p:32:32"
+
+; Check some past-the-end subtleties.
+
+@opte_a = global i32 0
+@opte_b = global i32 0
+
+; Comparing base addresses of two distinct globals. Never equal.
+
+define zeroext i1 @no_offsets() {
+  %t = icmp eq i32* @opte_a, @opte_b
+  ret i1 %t
+  ; CHECK: no_offsets(
+  ; CHECK: ret i1 false
+}
+
+; Comparing past-the-end addresses of two distinct globals. Never equal.
+
+define zeroext i1 @both_past_the_end() {
+  %x = getelementptr i32* @opte_a, i32 1
+  %y = getelementptr i32* @opte_b, i32 1
+  %t = icmp eq i32* %x, %y
+  ret i1 %t
+  ; CHECK: both_past_the_end(
+  ; CHECK-NOT: ret i1 true
+  ; TODO: refine this
+}
+
+; Comparing past-the-end addresses of one global to the base address
+; of another. Can't fold this.
+
+define zeroext i1 @just_one_past_the_end() {
+  %x = getelementptr i32* @opte_a, i32 1
+  %t = icmp eq i32* %x, @opte_b
+  ret i1 %t
+  ; CHECK: just_one_past_the_end(
+  ; CHECK: ret i1 icmp eq (i32* getelementptr inbounds (i32* @opte_a, i32 1), i32* @opte_b)
+}
+
+; Comparing base addresses of two distinct allocas. Never equal.
+
+define zeroext i1 @no_alloca_offsets() {
+  %m = alloca i32
+  %n = alloca i32
+  %t = icmp eq i32* %m, %n
+  ret i1 %t
+  ; CHECK: no_alloca_offsets(
+  ; CHECK: ret i1 false
+}
+
+; Comparing past-the-end addresses of two distinct allocas. Never equal.
+
+define zeroext i1 @both_past_the_end_alloca() {
+  %m = alloca i32
+  %n = alloca i32
+  %x = getelementptr i32* %m, i32 1
+  %y = getelementptr i32* %n, i32 1
+  %t = icmp eq i32* %x, %y
+  ret i1 %t
+  ; CHECK: both_past_the_end_alloca(
+  ; CHECK-NOT: ret i1 true
+  ; TODO: refine this
+}
+
+; Comparing past-the-end addresses of one alloca to the base address
+; of another. Can't fold this.
+
+define zeroext i1 @just_one_past_the_end_alloca() {
+  %m = alloca i32
+  %n = alloca i32
+  %x = getelementptr i32* %m, i32 1
+  %t = icmp eq i32* %x, %n
+  ret i1 %t
+  ; CHECK: just_one_past_the_end_alloca(
+  ; CHECK: ret i1 %t
+}
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
index 1eb1fd4..8b4aa79 100644
--- a/test/Transforms/InstSimplify/ptr_diff.ll
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -46,3 +46,33 @@ define i64 @ptrdiff3(i8* %ptr) {
   %diff = sub i64 %last.int, %first.int
   ret i64 %diff
 }
+
+define <4 x i32> @ptrdiff4(<4 x i8*> %arg) nounwind {
+; Handle simple cases of vectors of pointers.
+; CHECK: @ptrdiff4
+; CHECK: ret <4 x i32> zeroinitializer
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  %bc = bitcast <4 x i8*> %arg to <4 x i32*>
+  %p2 = ptrtoint <4 x i32*> %bc to <4 x i32>
+  %sub = sub <4 x i32> %p1, %p2
+  ret <4 x i32> %sub
+}
+
+%struct.ham = type { i32, [2 x [2 x i32]] }
+
+@global = internal global %struct.ham zeroinitializer, align 4
+
+define i32 @ptrdiff5() nounwind {
+bb:
+  %tmp = getelementptr inbounds %struct.ham* @global, i32 0, i32 1
+  %tmp1 = getelementptr inbounds [2 x [2 x i32]]* %tmp, i32 0, i32 0
+  %tmp2 = bitcast [2 x i32]* %tmp1 to i32*
+  %tmp3 = ptrtoint i32* %tmp2 to i32
+  %tmp4 = getelementptr inbounds %struct.ham* @global, i32 0, i32 1
+  %tmp5 = getelementptr inbounds [2 x [2 x i32]]* %tmp4, i32 0, i32 0
+  %tmp6 = ptrtoint [2 x i32]* %tmp5 to i32
+  %tmp7 = sub i32 %tmp3, %tmp6
+  ret i32 %tmp7
+; CHECK: @ptrdiff5
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 93fa29b..fe3dc77 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -497,8 +497,8 @@ l2:
   br label %l3
 
 l3:
-; CHECK: call void @g() noduplicate
-; CHECK-NOT: call void @g() noduplicate
+; CHECK: call void @g() [[NOD:#[0-9]+]]
+; CHECK-NOT: call void @g() [[NOD]]
   call void @g() noduplicate
   %y = icmp ult i32 %p, 5
   br i1 %y, label %l4, label %l5
@@ -512,3 +512,5 @@ l5:
   ret void
 ; CHECK: }
 }
+
+; CHECK: attributes [[NOD]] = { noduplicate }
diff --git a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
index fe8d445..2bf2604 100644
--- a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
+++ b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm | lli %defaultjit
+; RUN: opt < %s -licm | lli -force-interpreter
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 98f9334..1ca377e 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -90,3 +90,29 @@ for.end:                                          ; preds = %for.body
 
 declare void @foo_may_call_exit(i32)
 
+; PR14854
+; CHECK: @test5
+; CHECK: extractvalue
+; CHECK: br label %tailrecurse
+; CHECK: tailrecurse:
+; CHECK: ifend:
+; CHECK: insertvalue
+define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %then, %entry
+  %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
+  %out = extractvalue { i32*, i32 } %e, 1
+  %d = insertvalue { i32*, i32 } %e, i32* null, 0
+  %cmp1 = icmp sgt i32 %out, %i.tr
+  br i1 %cmp1, label %then, label %ifend
+
+then:                                             ; preds = %tailrecurse
+  call void @foo()
+  %cmp2 = add i32 %i.tr, 1
+  br label %tailrecurse
+
+ifend:                                            ; preds = %tailrecurse
+  ret { i32*, i32 } %d
+}
diff --git a/test/Transforms/LoopDeletion/simplify-then-delete.ll b/test/Transforms/LoopDeletion/simplify-then-delete.ll
index 5a21672..4278ef1 100644
--- a/test/Transforms/LoopDeletion/simplify-then-delete.ll
+++ b/test/Transforms/LoopDeletion/simplify-then-delete.ll
@@ -4,7 +4,7 @@
 ; Indvars and loop deletion should be able to eliminate all looping
 ; in this testcase.
 
-; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) nounwind {
+; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   ret i32 0
 ; CHECK-NEXT: }
@@ -63,3 +63,5 @@ w.e:
 w.e12:
   ret i32 0
 }
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/LoopIdiom/X86/popcnt.ll b/test/Transforms/LoopIdiom/X86/popcnt.ll
index 2f458fb..25df93d 100644
--- a/test/Transforms/LoopIdiom/X86/popcnt.ll
+++ b/test/Transforms/LoopIdiom/X86/popcnt.ll
@@ -118,3 +118,23 @@ while.end:                                        ; preds = %while.body, %entry
   %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
   ret i32 %c.0.lcssa
 }
+
+define i32 @PopCntCrash3(i64 %a, i32 %x) {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  %cmp = icmp eq i32 %x, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
index 53da462..9524be3 100644
--- a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
@@ -5,7 +5,7 @@
 ; PR13361: LSR + SCEV "hangs" on reasonably sized test with sequence of loops
 ;
 ; Without limits on CollectSubexpr, we have thousands of formulae for
-; the use that crosses loops. With limits we have six.
+; the use that crosses loops. With limits we have five.
 ; CHECK: LSR on loop %bb221:
 ; CHECK: After generating reuse formulae:
 ; CHECK: LSR is examining the following uses:
@@ -15,11 +15,8 @@
 ; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
 ; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
 ; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
-; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
 ; CHECK-NOT:reg
 ; CHECK: Filtering for use
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-freebsd9"
 
 %struct.snork = type { %struct.fuga, i32, i32, i32, i32, i32, i32 }
 %struct.fuga = type { %struct.gork, i64 }
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
new file mode 100644
index 0000000..8fbddf8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -0,0 +1,84 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; LTO of clang, which mistakenly uses no TargetLoweringInfo, causes a
+; miscompile. ReuseOrCreateCast replace ptrtoint operand with undef.
+; Reproducing the miscompile requires no triple, hence no "TTI".
+; rdar://13007381
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Verify that nothing uses the "dead" ptrtoint from "undef".
+; CHECK: @VerifyDiagnosticConsumerTest
+; CHECK: bb:
+; CHECK: %0 = ptrtoint i8* undef to i64
+; CHECK-NOT: %0
+; CHECK: .lr.ph
+; CHECK-NOT: %0
+; CHECK: sub i64 %7, %tmp6
+; CHECK-NOT: %0
+; CHECK: ret void
+define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
+bb:
+  %tmp3 = call i8* @getCharData() nounwind
+  %tmp4 = call i8* @getCharData() nounwind
+  %tmp5 = ptrtoint i8* %tmp4 to i64
+  %tmp6 = ptrtoint i8* %tmp3 to i64
+  %tmp7 = sub i64 %tmp5, %tmp6
+  br i1 undef, label %bb87, label %.preheader
+
+.preheader:                                       ; preds = %bb10, %bb
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %bb10
+
+bb10:                                             ; preds = %.preheader
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42, label %.preheader
+
+_ZNK4llvm9StringRef4findEcm.exit42:               ; preds = %bb10
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %.lr.ph
+
+_ZNK4llvm9StringRef4findEcm.exit42.thread:        ; preds = %_ZNK4llvm9StringRef4findEcm.exit42, %.preheader
+  unreachable
+
+.lr.ph:                                           ; preds = %_ZNK4llvm9StringRef4findEcm.exit42
+  br label %bb36
+
+_ZNK4llvm9StringRef4findEcm.exit.loopexit:        ; preds = %bb63
+  %tmp21 = icmp eq i64 %i.0.i, -1
+  br i1 %tmp21, label %_ZNK4llvm9StringRef4findEcm.exit._crit_edge, label %bb36
+
+_ZNK4llvm9StringRef4findEcm.exit._crit_edge:      ; preds = %bb61, %_ZNK4llvm9StringRef4findEcm.exit.loopexit
+  unreachable
+
+bb36:                                             ; preds = %_ZNK4llvm9StringRef4findEcm.exit.loopexit, %.lr.ph
+  %loc.063 = phi i64 [ undef, %.lr.ph ], [ %i.0.i, %_ZNK4llvm9StringRef4findEcm.exit.loopexit ]
+  switch i8 undef, label %bb57 [
+    i8 10, label %bb48
+    i8 13, label %bb48
+  ]
+
+bb48:                                             ; preds = %bb36, %bb36
+  br label %bb58
+
+bb57:                                             ; preds = %bb36
+  br label %bb58
+
+bb58:                                             ; preds = %bb57, %bb48
+  %tmp59 = icmp ugt i64 %tmp7, undef
+  %tmp60 = select i1 %tmp59, i64 undef, i64 %tmp7
+  br label %bb61
+
+bb61:                                             ; preds = %bb63, %bb58
+  %i.0.i = phi i64 [ %tmp60, %bb58 ], [ %tmp67, %bb63 ]
+  %tmp62 = icmp eq i64 %i.0.i, %tmp7
+  br i1 %tmp62, label %_ZNK4llvm9StringRef4findEcm.exit._crit_edge, label %bb63
+
+bb63:                                             ; preds = %bb61
+  %tmp64 = getelementptr inbounds i8* %tmp3, i64 %i.0.i
+  %tmp65 = load i8* %tmp64, align 1
+  %tmp67 = add i64 %i.0.i, 1
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit.loopexit, label %bb61
+
+bb87:                                             ; preds = %bb
+  ret void
+}
+
+declare i8* @getCharData()
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 9189d79..ee3cc4d 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -205,18 +205,18 @@ for.end:                                          ; preds = %for.body
 ; post-increment addressing, no add's or add.w's beyond the three
 ; mentioned. Most importantly, there should be no spills or reloads!
 ;
-; CHECK: testNeon:
-; CHECK: %.lr.ph
-; CHECK-NOT: lsl.w
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK-NOT: add.w r
-; CHECK: bne
+; A9: testNeon:
+; A9: %.lr.ph
+; A9-NOT: lsl.w
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9-NOT: add.w r
+; A9: bne
 define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
   %1 = icmp sgt i32 %limit, 0
   br i1 %1, label %.lr.ph, label %45
@@ -290,3 +290,80 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
 }
 
 declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+
+; Handle chains in which the same offset is used for both loads and
+; stores to the same array.
+; rdar://11410078.
+;
+; A9: @testReuse
+; A9: %for.body
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE:[r[0-9]+]]], [[INC:r[0-9]]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], {{r[0-9]}}
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]]
+; A9: bne
+define void @testReuse(i8* %src, i32 %stride) nounwind ssp {
+entry:
+  %mul = shl nsw i32 %stride, 2
+  %idx.neg = sub i32 0, %mul
+  %mul1 = mul nsw i32 %stride, 3
+  %idx.neg2 = sub i32 0, %mul1
+  %mul5 = shl nsw i32 %stride, 1
+  %idx.neg6 = sub i32 0, %mul5
+  %idx.neg10 = sub i32 0, %stride
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.0110 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %src.addr = phi i8* [ %src, %entry ], [ %add.ptr45, %for.body ]
+  %add.ptr = getelementptr inbounds i8* %src.addr, i32 %idx.neg
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr, i32 1)
+  %add.ptr3 = getelementptr inbounds i8* %src.addr, i32 %idx.neg2
+  %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr3, i32 1)
+  %add.ptr7 = getelementptr inbounds i8* %src.addr, i32 %idx.neg6
+  %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr7, i32 1)
+  %add.ptr11 = getelementptr inbounds i8* %src.addr, i32 %idx.neg10
+  %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr11, i32 1)
+  %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %src.addr, i32 1)
+  %add.ptr17 = getelementptr inbounds i8* %src.addr, i32 %stride
+  %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr17, i32 1)
+  %add.ptr20 = getelementptr inbounds i8* %src.addr, i32 %mul5
+  %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr20, i32 1)
+  %add.ptr23 = getelementptr inbounds i8* %src.addr, i32 %mul1
+  %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr23, i32 1)
+  %vadd1 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld1, <8 x i8> %vld2) nounwind
+  %vadd2 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld2, <8 x i8> %vld3) nounwind
+  %vadd3 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld3, <8 x i8> %vld4) nounwind
+  %vadd4 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld4, <8 x i8> %vld5) nounwind
+  %vadd5 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld5, <8 x i8> %vld6) nounwind
+  %vadd6 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld6, <8 x i8> %vld7) nounwind
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
+  %inc = add nsw i32 %i.0110, 1
+  %add.ptr45 = getelementptr inbounds i8* %src.addr, i32 8
+  %exitcond = icmp eq i32 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+
+declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index 5108650..eedfc20 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ;
 ; Test LSR's ability to prune formulae that refer to nonexistant
 ; AddRecs in other loops.
@@ -15,13 +15,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin"
 
 ; CHECK: @test
-; CHECK: # %for.body{{$}}
-; dummyiv copy should be removed
-; CHECK-NOT: movq
-; CHECK: # %for.cond19.preheader
-; dummycnt should be removed
-; CHECK-NOT: incq
-; CHECK: # %for.body23{{$}}
+; CHECK: for.body:
+; CHECK: %lsr.iv
+; CHECK-NOT: %dummyout
+; CHECK: ret
 define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
 entry:
   %cmp34 = icmp eq i64 %count, 0
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 9e02d92..45aeb4e 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -4,18 +4,17 @@
 ; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
-; CHECK:   %3 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   %4 = lshr i64 %3, 1
-; CHECK:   %5 = mul i64 %4, 2
+; CHECK:   [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
+; CHECK:   [[r3:%[a-z0-9]+]] = mul i64 [[r2]], 2
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %5, %for.body.lr.ph ]
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ]
 ; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
 ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
 ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
 
 %struct.Vector2 = type { i16*, [64 x i16], i32 }
 
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index 59a8236..bde52da 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -19,7 +19,7 @@
 ; CHECK-NEXT:     i32 1, label %inc.us
 
 ; CHECK:      inc.us:                                           ; preds = %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -40,7 +40,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      dec.us3:                                          ; preds = %loop_begin.us1
-; CHECK-NEXT:   call void @decf() noreturn nounwind
+; CHECK-NEXT:   call void @decf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us5
 
 ; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
@@ -89,3 +89,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index 67982fe..c3bf596 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -25,7 +25,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -45,7 +45,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc:                                              ; preds = %loop_begin.inc_crit_edge, %second_switch
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge
 
 define i32 @test(i32* %var) {
@@ -82,3 +82,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 36b7eff..9530333 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
 ; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us.us
 
 ; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
@@ -50,7 +50,7 @@
 ; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
 
 ; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -75,7 +75,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc.us4:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us6
 
 ; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
@@ -136,3 +136,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
index 73391ca..f3fba64 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -21,11 +21,11 @@
 ; CHECK-NEXT: br label %cond.end.us
 
 ; CHECK: abort0.split:
-; CHECK-NEXT: call void @end0() noreturn nounwind
+; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT: unreachable
 
 ; CHECK: abort1:
-; CHECK-NEXT: call void @end1() noreturn nounwind
+; CHECK-NEXT: call void @end1() [[NOR_NUW]]
 ; CHECK-NEXT: unreachable
 
 ; CHECK: }
@@ -51,3 +51,7 @@ abort1:
 
 declare void @end0() noreturn
 declare void @end1() noreturn
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
index f285887..2dd7fe3 100644
--- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
+++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
new file mode 100644
index 0000000..c8d307f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S | FileCheck %s --check-prefix=SWIFT
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK: @foo
+;CHECK: load <4 x i32>
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+;SWIFT: @foo
+;SWIFT: load <4 x i32>
+;SWIFT: load <4 x i32>
+;SWIFT: ret
+define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
+  %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i32 %i.02
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, %sum.01
+  %5 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
new file mode 100644
index 0000000..6a68e81
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S -dce | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; Select VF = 8;
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+  %3 = load i16* %2, align 2
+  %4 = sext i16 %3 to i32
+  %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+  store i32 %4, i32* %5, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
new file mode 100644
index 0000000..cb77b09
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
new file mode 100644
index 0000000..d2e3de2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
+; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
+
+; ModuleID = 'arm.ll'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+%T216 = type <2 x i16>
+%T232 = type <2 x i32>
+%T264 = type <2 x i64>
+
+%T416 = type <4 x i16>
+%T432 = type <4 x i32>
+%T464 = type <4 x i64>
+
+define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'direct':
+  %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+  %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+  %r3 = mul %T432 %v0, %v1 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmul.i32
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'ups1632':
+  %v0 = load %T416* %loadaddr
+; ASM: vldr
+  %v1 = load %T416* %loadaddr2
+; ASM: vldr
+  %r1 = sext %T416 %v0 to %T432
+  %r2 = sext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
+  %r3 = mul %T432 %r1, %r2 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.s16
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'upu1632':
+  %v0 = load %T416* %loadaddr
+; ASM: vldr
+  %v1 = load %T416* %loadaddr2
+; ASM: vldr
+  %r1 = zext %T416 %v0 to %T432
+  %r2 = zext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
+  %r3 = mul %T432 %r1, %r2 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.u16
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'ups3264':
+  %v0 = load %T232* %loadaddr
+; ASM: vldr
+  %v1 = load %T232* %loadaddr2
+; ASM: vldr
+  %r3 = mul %T232 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+  %st = sext %T232 %r3 to %T264
+; ASM: vmovl.s32
+; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
+  store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'upu3264':
+  %v0 = load %T232* %loadaddr
+; ASM: vldr
+  %v1 = load %T232* %loadaddr2
+; ASM: vldr
+  %r3 = mul %T232 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+  %st = zext %T232 %r3 to %T264
+; ASM: vmovl.u32
+; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
+  store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+; COST: function 'dn3216':
+  %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+  %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+  %r3 = mul %T432 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+  %st = trunc %T432 %r3 to %T416
+; ASM: vmovn.i32
+; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
+  store %T416 %st, %T416* %storeaddr
+; ASM: vstr
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/width-detect.ll b/test/Transforms/LoopVectorize/ARM/width-detect.ll
new file mode 100644
index 0000000..c0795b6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK:foo_F64
+;CHECK: <2 x double>
+;CHECK:ret
+define double @foo_F64(double* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %prod.01 = phi double [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
+  %2 = getelementptr inbounds double* %A, i64 %indvars.iv
+  %3 = load double* %2, align 8
+  %4 = fmul fast double %prod.01, %3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %prod.0.lcssa = phi double [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
+  ret double %prod.0.lcssa
+}
+
+;CHECK:foo_I8
+;CHECK: xor <16 x i8>
+;CHECK:ret
+define signext i8 @foo_I8(i8* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %red.01 = phi i8 [ %4, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i8* %A, i64 %indvars.iv
+  %3 = load i8* %2, align 1
+  %4 = xor i8 %3, %red.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %red.0.lcssa = phi i8 [ 0, %0 ], [ %4, %.lr.ph ]
+  ret i8 %red.0.lcssa
+}
+
+
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll
index a2d176a..a85c6fe 100644
--- a/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -27,7 +27,7 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 
 
 ;CHECK: @read_mod_i64
-;CHECK: load <8 x i64>
+;CHECK: load <4 x i64>
 ;CHECK: ret i32
 define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 60c742e..23d9233 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: @conversion_cost1
-;CHECK: store <8 x i8>
+;CHECK: store <32 x i8>
 ;CHECK: ret
 define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 3
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index 0f21ba6..d2d0eac 100644
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -licm -S | FileCheck %s -check-prefix=UNROLL
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -53,8 +53,6 @@ define void @example1() nounwind uwtable ssp {
 ;UNROLL: @example10b
 ;UNROLL: load <4 x i16>
 ;UNROLL: load <4 x i16>
-;UNROLL: load <4 x i16>
-;UNROLL: store <4 x i32>
 ;UNROLL: store <4 x i32>
 ;UNROLL: store <4 x i32>
 ;UNROLL: ret void
diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
new file mode 100644
index 0000000..186fba8
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: <4 x float>
+define void @trivial_loop(float* nocapture %a) nounwind uwtable optsize {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %add = fadd float %0, 1.000000e+00
+  store float %add, float* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
new file mode 100644
index 0000000..452d0df
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The parallel loop has been invalidated by the new memory accesses introduced
+; by reg2mem (Loop::isParallel() starts to return false). Ensure the loop is
+; now non-vectorizable.
+
+;CHECK-NOT: <4 x i32>
+define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %indvars.iv.next.reg2mem = alloca i64
+  %indvars.iv.reg2mem = alloca i64
+  %"reg2mem alloca point" = bitcast i32 0 to i32
+  store i64 0, i64* %indvars.iv.reg2mem
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
+  %indvars.iv.reload = load i64* %indvars.iv.reg2mem
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next = add i64 %indvars.iv.reload, 1
+  ; A new store without the parallel metadata here:
+  store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem
+  %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop.parallel !3
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %indvars.iv.next.reload2 = load i64* %indvars.iv.next.reg2mem
+  store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem
+  br label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !3}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
new file mode 100644
index 0000000..f648722
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; A tricky loop:
+;
+; void loop(int *a, int *b) {
+;    for (int i = 0; i < 512; ++i) {
+;        a[a[i]] = b[i];
+;        a[i] = b[i+1];
+;    }
+;}
+
+;CHECK: @loop
+;CHECK-NOT: <4 x i32>
+define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; The same loop with parallel loop metadata added to the loop branch
+; and the memory instructions.
+
+;CHECK: @parallel_loop
+;CHECK: <4 x i32>
+define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  ; This store might have originated from inlining a function with a parallel
+  ; loop. Refers to a list with the "original loop reference" (!4) also included.
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; The same loop with an illegal parallel loop metadata: the memory
+; accesses refer to a different loop's identifier.
+
+;CHECK: @mixed_metadata
+;CHECK-NOT: <4 x i32>
+
+define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  ; This refers to the loop marked with !7 which we are not in at the moment.
+  ; It should prevent detecting as a parallel loop.
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !7
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !3}
+!4 = metadata !{metadata !4}
+!5 = metadata !{metadata !3, metadata !4}
+!6 = metadata !{metadata !6}
+!7 = metadata !{metadata !7}
diff --git a/test/Transforms/LoopVectorize/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
index 35b91bb..f390b33 100644
--- a/test/Transforms/LoopVectorize/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index 2075986..ef63a14 100644
--- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx2 -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
new file mode 100644
index 0000000..2d7b663
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Don't unroll when we have register pressure.
+;CHECK: reg_pressure
+;CHECK: load <4 x double>
+;CHECK-NOT: load  <4 x double>
+;CHECK: store <4 x double>
+;CHECK-NOT: store <4 x double>
+;CHECK: ret
+define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = sext i32 %n to i64
+  br label %2
+
+; <label>:2                                       ; preds = %2, %0
+  %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
+  %3 = getelementptr inbounds double* %A, i64 %indvars.iv
+  %4 = load double* %3, align 8
+  %5 = fadd double %4, 3.000000e+00
+  %6 = fmul double %4, 2.000000e+00
+  %7 = fadd double %5, %6
+  %8 = fadd double %7, 2.000000e+00
+  %9 = fmul double %8, 5.000000e-01
+  %10 = fadd double %6, %9
+  %11 = fsub double %10, %5
+  %12 = fadd double %4, %11
+  %13 = fdiv double %8, %12
+  %14 = fmul double %13, %8
+  %15 = fmul double %6, %14
+  %16 = fmul double %5, %15
+  %17 = fadd double %16, -3.000000e+00
+  %18 = fsub double %4, %5
+  %19 = fadd double %6, %18
+  %20 = fadd double %13, %19
+  %21 = fadd double %20, %17
+  %22 = fadd double %21, 3.000000e+00
+  %23 = fmul double %4, %22
+  store double %23, double* %3, align 8
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %24 = trunc i64 %indvars.iv to i32
+  %25 = icmp eq i32 %24, 0
+  br i1 %25, label %26, label %2
+
+; <label>:26                                      ; preds = %2
+  ret void
+}
+
+; This is a small loop. Unroll it twice. 
+;CHECK: small_loop
+;CHECK: xor
+;CHECK: xor
+;CHECK: ret
+define void @small_loop(i16* nocapture %A, i64 %n) nounwind uwtable ssp {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i16* %A, i64 %i.01
+  %3 = load i16* %2, align 2
+  %4 = xor i16 %3, 3
+  store i16 %4, i16* %2, align 2
+  %5 = add i64 %i.01, 1
+  %exitcond = icmp eq i64 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
new file mode 100644
index 0000000..59bb8d0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -0,0 +1,150 @@
+; RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%0 = type { %0*, %1 }
+%1 = type { i8*, i32 }
+
+@p = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@q = global [2048 x i16] zeroinitializer, align 16
+@r = global [2048 x i16] zeroinitializer, align 16
+
+; Tests for widest type
+; Ensure that we count the pointer store in the first test case. We have a
+; consecutive vector of pointers store, therefore we should count it towards the
+; widest vector count.
+;
+; CHECK: test_consecutive_store
+; CHECK: The Widest type: 64 bits
+define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
+  %4 = load %0** %2, align 8
+  %5 = icmp eq %0** %0, %1
+  br i1 %5, label %12, label %6
+
+; <label>:6                                       ; preds = %3
+  br label %7
+
+; <label>:7                                       ; preds = %7, %6
+  %8 = phi %0** [ %0, %6 ], [ %9, %7 ]
+  store %0* %4, %0** %8, align 8
+  %9 = getelementptr inbounds %0** %8, i64 1
+  %10 = icmp eq %0** %9, %1
+  br i1 %10, label %11, label %7
+
+; <label>:11                                      ; preds = %7
+  br label %12
+
+; <label>:12                                      ; preds = %11, %3
+  ret void
+}
+
+; However, if the store of a set of pointers is not to consecutive memory we do
+; NOT count the store towards the widest vector type.
+; In the test case below we add i16 types to store it in an array of pointer,
+; therefore the widest type should be i16.
+; int* p[2048][8];
+; short q[2048];
+;   for (int y = 0; y < 8; ++y)
+;     for (int i = 0; i < 1024; ++i) {
+;       p[i][y] = (int*) (1 + q[i]);
+;     }
+; CHECK: test_nonconsecutive_store
+; CHECK: The Widest type: 16 bits
+define void @test_nonconsecutive_store() nounwind ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %14, %0
+  %2 = phi i64 [ 0, %0 ], [ %15, %14 ]
+  br label %3
+
+; <label>:3                                       ; preds = %3, %1
+  %4 = phi i64 [ 0, %1 ], [ %11, %3 ]
+  %5 = getelementptr inbounds [2048 x i16]* @q, i64 0, i64 %4
+  %6 = load i16* %5, align 2
+  %7 = sext i16 %6 to i64
+  %8 = add i64 %7, 1
+  %9 = inttoptr i64 %8 to i32*
+  %10 = getelementptr inbounds [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2
+  store i32* %9, i32** %10, align 8
+  %11 = add i64 %4, 1
+  %12 = trunc i64 %11 to i32
+  %13 = icmp ne i32 %12, 1024
+  br i1 %13, label %3, label %14
+
+; <label>:14                                      ; preds = %3
+  %15 = add i64 %2, 1
+  %16 = trunc i64 %15 to i32
+  %17 = icmp ne i32 %16, 8
+  br i1 %17, label %1, label %18
+
+; <label>:18                                      ; preds = %14
+  ret void
+}
+
+
+@ia = global [1024 x i32*] zeroinitializer, align 16
+@ib = global [1024 x i32] zeroinitializer, align 16
+@ic = global [1024 x i8] zeroinitializer, align 16
+@p2 = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@q2 = global [2048 x i16] zeroinitializer, align 16
+
+;; Now we check the same rules for loads. We should take consecutive loads of
+;; pointer types into account.
+; CHECK: test_consecutive_ptr_load
+; CHECK: The Widest type: 64 bits
+define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi i64 [ 0, %0 ], [ %10, %1 ]
+  %3 = phi i8 [ 0, %0 ], [ %9, %1 ]
+  %4 = getelementptr inbounds [1024 x i32*]* @ia, i32 0, i64 %2
+  %5 = load i32** %4, align 4
+  %6 = ptrtoint i32* %5 to i64
+  %7 = trunc i64 %6 to i8
+  %8 = add i8 %3, 1
+  %9 = add i8 %7, %8
+  %10 = add i64 %2, 1
+  %11 = icmp ne i64 %10, 1024
+  br i1 %11, label %1, label %12
+
+; <label>:12                                      ; preds = %1
+  %13 = phi i8 [ %9, %1 ]
+  ret i8 %13
+}
+
+;; However, we should not take unconsecutive loads of pointers into account.
+; CHECK: test_nonconsecutive_ptr_load
+; CHECK: The Widest type: 16 bits
+define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %13, %0
+  %2 = phi i64 [ 0, %0 ], [ %14, %13 ]
+  br label %3
+
+; <label>:3                                       ; preds = %3, %1
+  %4 = phi i64 [ 0, %1 ], [ %10, %3 ]
+  %5 = getelementptr inbounds [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
+  %6 = getelementptr inbounds [2048 x i16]* @q2, i64 0, i64 %4
+  %7 = load i32** %5, align 2
+  %8 = ptrtoint i32* %7 to i64
+  %9 = trunc i64 %8 to i16
+  store i16 %9, i16* %6, align 8
+  %10 = add i64 %4, 1
+  %11 = trunc i64 %10 to i32
+  %12 = icmp ne i32 %11, 1024
+  br i1 %12, label %3, label %13
+
+; <label>:13                                      ; preds = %3
+  %14 = add i64 %2, 1
+  %15 = trunc i64 %14 to i32
+  %16 = icmp ne i32 %15, 8
+  br i1 %16, label %1, label %17
+
+; <label>:17                                      ; preds = %13
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
new file mode 100644
index 0000000..431e422
--- /dev/null
+++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: fc
+;CHECK: load <4 x i16>
+;CHECK-NEXT: shufflevector <4 x i16>
+;CHECK: select <4 x i1>
+;CHECK: store <4 x i16>
+;CHECK: ret
+define void @fc(i16* nocapture %p, i32 %n, i32 %size) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %cond.end, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %cond.end ]
+  %p.addr.0 = phi i16* [ %p, %entry ], [ %incdec.ptr, %cond.end ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.0, i64 -1
+  %0 = load i16* %incdec.ptr, align 2, !tbaa !0
+  %conv = zext i16 %0 to i32
+  %cmp = icmp ult i32 %conv, %size
+  br i1 %cmp, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %do.body
+  %sub = sub i32 %conv, %size
+  %phitmp = trunc i32 %sub to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %do.body, %cond.true
+  %cond = phi i16 [ %phitmp, %cond.true ], [ 0, %do.body ]
+  store i16 %cond, i16* %incdec.ptr, align 2, !tbaa !0
+  %dec = add i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %cond.end
+  ret void
+}
+
+;CHECK: example1
+;CHECK: load <4 x i32>
+;CHECK-NEXT: shufflevector <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define void @example1(i32* nocapture %a, i32 %n, i32 %wsize) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ]
+  %p.0 = phi i32* [ %a, %entry ], [ %incdec.ptr, %do.body ]
+  %incdec.ptr = getelementptr inbounds i32* %p.0, i64 -1
+  %0 = load i32* %incdec.ptr, align 4, !tbaa !3
+  %cmp = icmp slt i32 %0, %wsize
+  %sub = sub nsw i32 %0, %wsize
+  %cond = select i1 %cmp, i32 0, i32 %sub
+  store i32 %cond, i32* %incdec.ptr, align 4, !tbaa !3
+  %dec = add nsw i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.body
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll
index 55c1eba..08c84ef 100644
--- a/test/Transforms/LoopVectorize/calloc.ll
+++ b/test/Transforms/LoopVectorize/calloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
index 5c090aa..2aa29ed 100644
--- a/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; rdar://problem/12848162
 
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index 7cd608d..da0fb05 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
index b7f3815..656912e 100644
--- a/test/Transforms/LoopVectorize/flags.ll
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index b8b125f..f335557 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -licm -S | FileCheck %s
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -licm -S | FileCheck %s -check-prefix=UNROLL
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
new file mode 100644
index 0000000..24e698b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -0,0 +1,1078 @@
+; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
+
+@Foo = common global %struct.anon zeroinitializer, align 4
+@Bar = common global %struct.anon.0 zeroinitializer, align 4
+
+@PB = external global i32*
+@PA = external global i32*
+
+
+;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
+
+
+; /// Different objects, positive induction, constant distance
+; int noAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias01
+; CHECK: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx2, align 4
+  ret i32 %7
+}
+
+; /// Different objects, positive induction with widening slide
+; int noAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE-10; i++)
+;     Foo.A[i] = Foo.B[i+10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias02
+; CHECK: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias02(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 90
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %add = add nsw i32 %1, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add1 = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add1, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Different objects, positive induction with shortening slide
+; int noAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias03
+; CHECK: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias03(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add1 = add nsw i32 %4, 10
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, positive stride, run-time check added
+; int noAlias04 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+i) = *(PB+i) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @noAlias04
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK ret
+;
+; TODO: This test vectorizes (with run-time check) on real targets with -O3)
+; Check why it's not being vectorized even when forcing vectorization
+
+define i32 @noAlias04(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %2 = load i32* %i, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 %2
+  %3 = load i32* %add.ptr, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %6 = load i32* %i, align 4
+  %add.ptr1 = getelementptr inbounds i32* %5, i32 %6
+  store i32 %add, i32* %add.ptr1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr2 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr2, align 4
+  ret i32 %10
+}
+
+; /// Different objects, positive induction, multi-array
+; int noAlias05 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][i] = Bar.B[N][i] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias05
+; CHECK: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias05(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
+  %3 = load i32* %arrayidx1, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %6 = load i32* %N, align 4
+  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8
+  %10 = load i32* %arrayidx5, align 4
+  ret i32 %10
+}
+
+; /// Same objects, positive induction, multi-array, different sub-elements
+; int noAlias06 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][i] = Bar.A[N+1][i] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias06
+; CHECK: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias06(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %N, align 4
+  %add = add nsw i32 %2, 1
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
+  %3 = load i32* %arrayidx1, align 4
+  %4 = load i32* %a.addr, align 4
+  %add2 = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %6 = load i32* %N, align 4
+  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5
+  store i32 %add2, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8
+  %10 = load i32* %arrayidx6, align 4
+  ret i32 %10
+}
+
+; /// Different objects, negative induction, constant distance
+; int noAlias07 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias07
+; CHECK: sub nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias07(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Different objects, negative induction, shortening slide
+; int noAlias08 (int a) {
+;   int i;
+;   for (i=0; i<SIZE-10; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias08
+; CHECK: sub nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias08(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 90
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Different objects, negative induction, widening slide
+; int noAlias09 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias09
+; CHECK: sub nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias09(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 10
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, negative stride, run-time check added
+; int noAlias10 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @noAlias10
+; CHECK-NOT: sub nsw <4 x i32>
+; CHECK ret
+;
+; TODO: This test vectorizes (with run-time check) on real targets with -O3)
+; Check why it's not being vectorized even when forcing vectorization
+
+define i32 @noAlias10(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 100
+  %2 = load i32* %i, align 4
+  %idx.neg = sub i32 0, %2
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
+  %3 = load i32* %add.ptr2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %add.ptr3 = getelementptr inbounds i32* %5, i32 100
+  %6 = load i32* %i, align 4
+  %idx.neg4 = sub i32 0, %6
+  %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4
+  %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1
+  store i32 %add, i32* %add.ptr6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr7 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr7, align 4
+  ret i32 %10
+}
+
+; /// Different objects, negative induction, multi-array
+; int noAlias11 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias11
+; CHECK: sub nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias11(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %2 = load i32* %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
+  %3 = load i32* %arrayidx2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %sub3 = sub nsw i32 100, %5
+  %sub4 = sub nsw i32 %sub3, 1
+  %6 = load i32* %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4
+  store i32 %add, i32* %arrayidx6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8
+  %10 = load i32* %arrayidx8, align 4
+  ret i32 %10
+}
+
+; /// Same objects, negative induction, multi-array, different sub-elements
+; int noAlias12 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias12
+; CHECK: sub nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias12(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %2 = load i32* %N, align 4
+  %add = add nsw i32 %2, 1
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
+  %3 = load i32* %arrayidx2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add3 = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %sub4 = sub nsw i32 100, %5
+  %sub5 = sub nsw i32 %sub4, 1
+  %6 = load i32* %N, align 4
+  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5
+  store i32 %add3, i32* %arrayidx7, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8
+  %10 = load i32* %arrayidx9, align 4
+  ret i32 %10
+}
+
+; /// Same objects, positive induction, constant distance, just enough for vector size
+; int noAlias13 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.A[i+4] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias13
+; CHECK: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias13(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %add = add nsw i32 %1, 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add1 = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add1, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Same objects, negative induction, constant distance, just enough for vector size
+; int noAlias14 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias14
+; CHECK: sub nsw <4 x i32>
+; CHECK ret
+
+define i32 @noAlias14(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 5
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+
+;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
+
+
+; /// Different objects, swapped induction, alias at the end
+; int mayAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mayAlias01
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @mayAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Different objects, swapped induction, alias at the beginning
+; int mayAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mayAlias02
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @mayAlias02(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %4
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, run-time check added
+; int mayAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+i) = *(PB+SIZE-i-1) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @mayAlias03
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @mayAlias03(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 100
+  %2 = load i32* %i, align 4
+  %idx.neg = sub i32 0, %2
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
+  %3 = load i32* %add.ptr2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %6 = load i32* %i, align 4
+  %add.ptr3 = getelementptr inbounds i32* %5, i32 %6
+  store i32 %add, i32* %add.ptr3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr4 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr4, align 4
+  ret i32 %10
+}
+
+
+;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
+
+
+; int mustAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias01
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @mustAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add2 = add nsw i32 %4, 10
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx4, align 4
+  ret i32 %7
+}
+
+; int mustAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias02
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @mustAlias02(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; int mustAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias03
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK ret
+
+define i32 @mustAlias03(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add2 = add nsw i32 %4, 10
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx4, align 4
+  ret i32 %7
+}
diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll
new file mode 100644
index 0000000..7759b70
--- /dev/null
+++ b/test/Transforms/LoopVectorize/i8-induction.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global i8 0, align 1
+@b = common global i8 0, align 1
+
+define void @f() nounwind uwtable ssp {
+scalar.ph:
+  store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0
+  %0 = load i8* @a, align 1, !tbaa !0
+  br label %for.body
+
+for.body:
+  %mul16 = phi i8 [ 0, %scalar.ph ], [ %mul, %for.body ]              ; <------- i8 induction var.
+  %c.015 = phi i8 [ undef, %scalar.ph ], [ %conv8, %for.body ]
+  %conv2 = sext i8 %c.015 to i32
+  %tobool = icmp ne i8 %c.015, 0
+  %.sink = select i1 %tobool, i8 %c.015, i8 %0
+  %mul = mul i8 %mul16, %.sink
+  %add = add nsw i32 %conv2, 1
+  %conv8 = trunc i32 %add to i8
+  %sext = shl i32 %add, 24
+  %phitmp14 = icmp slt i32 %sext, 268435456
+  br i1 %phitmp14, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  store i8 %mul, i8* @b, align 1, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+
diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
index c6dc5d7..3a2d82e 100644
--- a/test/Transforms/LoopVectorize/if-conversion-reduction.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index 28407dc..6e7c03a 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index e24fb39..3fa6b19 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 49c8ecb..7d5a5d7 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
index 6eab799..45aa8c7 100644
--- a/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; int __attribute__((noinline)) sum_array(int *A, int n) {
 ;  return std::accumulate(A, A + n, 0);
diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll
index dbdec33..de23bf0 100644
--- a/test/Transforms/LoopVectorize/nofloat.ll
+++ b/test/Transforms/LoopVectorize/nofloat.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes.
 
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 7e4cee4..8262a18 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/nsw-crash.ll b/test/Transforms/LoopVectorize/nsw-crash.ll
new file mode 100644
index 0000000..e5fad14
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nsw-crash.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @test() {
+entry:
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:
+  br label %while.body
+
+while.body:
+  %it.sroa.0.091 = phi i32* [ undef, %while.body.lr.ph ], [ %incdec.ptr.i, %while.body ]
+  %incdec.ptr.i = getelementptr inbounds i32* %it.sroa.0.091, i64 1
+  %inc32 = add i32 undef, 1                                        ; <------------- Make sure we don't set NSW flags to the undef.
+  %cmp.i11 = icmp eq i32* %incdec.ptr.i, undef
+  br i1 %cmp.i11, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
new file mode 100644
index 0000000..25599f8
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@A = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+@B = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+
+;CHECK:_Z5test1v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test1v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18), %0 ], [ %4, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 0), %0 ], [ %5, %1 ]
+  %2 = load i32* %b.01, align 4
+  %3 = shl nsw i32 %2, 1
+  store i32 %3, i32* %p.02, align 4
+  %4 = getelementptr inbounds i32* %p.02, i64 -1
+  %5 = getelementptr inbounds i32* %b.01, i64 1
+  %6 = icmp eq i32* %4, getelementptr ([36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %6, label %7, label %1
+
+; <label>:7                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test2v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test2v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 25), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 2), %0 ], [ %4, %1 ]
+  %2 = load i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32* %b.01, i64 1
+  %5 = icmp eq i32* %4, getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test3v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test3v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 29), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 5), %0 ], [ %4, %1 ]
+  %2 = load i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32* %b.01, i64 1
+  %5 = icmp eq i32* %3, getelementptr ([36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index c3c9035..bfaa6d4 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 129c20d..08b7b27 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 2852684..86098a6 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -9,6 +9,10 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;     a[i] = b[i] * 3;
 ; }
 
+;CHECK: for.body.preheader:
+;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck
+;CHECK: vector.memcheck:
+;CHECK: br i1 %found.conflict, label %middle.block, label %vector.ph
 ;CHECK: load <4 x float>
 define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
 entry:
diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll
index 2a1f19d..1573893 100644
--- a/test/Transforms/LoopVectorize/same-base-access.ll
+++ b/test/Transforms/LoopVectorize/same-base-access.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S -enable-if-conversion | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index d72cd14..7a14d24 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll
index 9825764..7e2dd5f 100644
--- a/test/Transforms/LoopVectorize/simple-unroll.ll
+++ b/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
index ae784b3..fa83dba 100644
--- a/test/Transforms/LoopVectorize/small-loop.ll
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
new file mode 100644
index 0000000..de65d0d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct.coordinate = type { i32, i32 }
+
+; Make sure that we don't generate a wide load when accessing the struct.
+; struct coordinate {
+;  int x;
+;  int y;
+; };
+;
+;
+; int foo(struct coordinate *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK: @foo
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.coordinate* %A, i64 %indvars.iv, i32 0
+  %0 = load i32* %x, align 4, !tbaa !0
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll
index b42122b..54cbe8d 100644
--- a/test/Transforms/LoopVectorize/write-only.ll
+++ b/test/Transforms/LoopVectorize/write-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 3fa1628..582a57b 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -70,20 +70,20 @@ define void @test4(i8 *%P) {
   %A = alloca %1
   %a = bitcast %1* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
-  call void @test4a(i8* byval align 1 %a)
+  call void @test4a(i8* align 1 byval %a)
   ret void
 ; CHECK: @test4
 ; CHECK-NEXT: call void @test4a(
 }
 
-declare void @test4a(i8* byval align 1)
+declare void @test4a(i8* align 1 byval)
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 %struct.S = type { i128, [4 x i8]}
 
 @sS = external global %struct.S, align 16
 
-declare void @test5a(%struct.S* byval align 16) nounwind ssp
+declare void @test5a(%struct.S* align 16 byval) nounwind ssp
 
 
 ; rdar://8713376 - This memcpy can't be eliminated.
@@ -94,7 +94,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false)
   %a = getelementptr %struct.S* %y, i64 0, i32 1, i64 0
   store i8 4, i8* %a
-  call void @test5a(%struct.S* byval align 16 %y)
+  call void @test5a(%struct.S* align 16 byval %y)
   ret i32 0
   ; CHECK: @test5(
   ; CHECK: store i8 4
@@ -114,19 +114,19 @@ define void @test6(i8 *%P) {
 ; isn't itself 8 byte aligned.
 %struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 
-define i32 @test7(%struct.p* nocapture byval align 8 %q) nounwind ssp {
+define i32 @test7(%struct.p* nocapture align 8 byval %q) nounwind ssp {
 entry:
   %agg.tmp = alloca %struct.p, align 4
   %tmp = bitcast %struct.p* %agg.tmp to i8*
   %tmp1 = bitcast %struct.p* %q to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
-  %call = call i32 @g(%struct.p* byval align 8 %agg.tmp) nounwind
+  %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind
   ret i32 %call
 ; CHECK: @test7
-; CHECK: call i32 @g(%struct.p* byval align 8 %q) nounwind
+; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[NUW:#[0-9]+]]
 }
 
-declare i32 @g(%struct.p* byval align 8)
+declare i32 @g(%struct.p* align 8 byval)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
@@ -152,7 +152,7 @@ declare noalias i8* @malloc(i32)
 ; rdar://11341081
 %struct.big = type { [50 x i32] }
 
-define void @test9() nounwind uwtable ssp {
+define void @test9() nounwind ssp uwtable {
 entry:
 ; CHECK: test9
 ; CHECK: f1
@@ -170,3 +170,7 @@ entry:
 
 declare void @f1(%struct.big* sret)
 declare void @f2(%struct.big*)
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind ssp }
+; CHECK: attributes #2 = { nounwind ssp uwtable }
diff --git a/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll b/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
new file mode 100644
index 0000000..3f6a5ba
--- /dev/null
+++ b/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
@@ -0,0 +1,36 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to trigger a ConstantExpr::getBitCast assertion.
+
+define void @t1() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb12 [
+    i32 127, label %sw.bb
+    i32 126, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  unreachable
+
+sw.bb12:                                          ; preds = %entry
+  ret void
+}
+
+define void @t2() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb8 [
+    i32 4, label %sw.bb
+    i32 3, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  ret void
+
+sw.bb8:                                           ; preds = %entry
+  unreachable
+}
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
index 8c7b5b1..4541b3f 100644
--- a/test/Transforms/ObjCARC/apelim.ll
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -38,8 +38,8 @@ entry:
 }
 
 ; CHECK: define internal void @_GLOBAL__I_y()
-; CHECK: %0 = call i8* @objc_autoreleasePoolPush() nounwind
-; CHECK: call void @objc_autoreleasePoolPop(i8* %0) nounwind
+; CHECK: %0 = call i8* @objc_autoreleasePoolPush() [[NUW:#[0-9]+]]
+; CHECK: call void @objc_autoreleasePoolPop(i8* %0) [[NUW]]
 ; CHECK: }
 define internal void @_GLOBAL__I_y() {
 entry:
@@ -51,3 +51,5 @@ entry:
 
 declare i8* @objc_autoreleasePoolPush()
 declare void @objc_autoreleasePoolPop(i8*)
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 7b64b1b..4c24ebf 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -92,10 +92,10 @@ alt_return:
 
 ; CHECK: define void @test1b(
 ; CHECK: entry:
-; CHECK:   tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: if.end5:
-; CHECK:   tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test1b(i8* %x, i1 %p, i1 %q) {
@@ -404,8 +404,8 @@ entry:
 ; a stack argument.
 
 ; CHECK: define void @test11(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define void @test11(i8* %x) nounwind {
 entry:
@@ -431,8 +431,8 @@ entry:
 ; Same as test11 but the value is returned. Do an RV optimization.
 
 ; CHECK: define i8* @test11b(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %0) [[NUW]]
 ; CHECK: }
 define i8* @test11b(i8* %x) nounwind {
 entry:
@@ -462,10 +462,10 @@ entry:
 ; Trivial retain,autorelease pair. Don't delete!
 
 ; CHECK: define void @test13(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %x)
-; CHECK: tail call i8* @objc_autorelease(i8* %x) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %x) [[NUW]]
 ; CHECK: }
 define void @test13(i8* %x, i64 %n) {
 entry:
@@ -716,7 +716,7 @@ entry:
 ; Bitcast insertion
 
 ; CHECK: define void @test20(
-; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: invoke
 define void @test20(double* %self) {
 if.then12:
@@ -980,7 +980,7 @@ done:
 ; CHECK: call i8* @objc_retain(
 ; CHECK: call void @callee()
 ; CHECK: store
-; CHECK: call void @objc_release(i8* %p) nounwind, !clang.imprecise_release
+; CHECK: call void @objc_release(i8* %p) [[NUW]], !clang.imprecise_release
 ; CHECK: done:
 ; CHECK-NOT: @objc_
 ; CHECK: }
@@ -1450,9 +1450,9 @@ define void @test45(i8** %pp, i8** %qq) {
 ; Don't delete retain and autorelease here.
 
 ; CHECK: define void @test46(
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
-; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test46(i8* %p, i1 %a) {
 entry:
   call i8* @objc_retain(i8* %p)
@@ -1565,7 +1565,7 @@ define void @test53(void ()** %zz, i8** %pp) {
 
 ; CHECK: define void @test54(
 ; CHECK: call i8* @returner()
-; CHECK-NEXT: call void @objc_release(i8* %t) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: ret void
 define void @test54() {
   %t = call i8* @returner()
@@ -1595,10 +1595,10 @@ entry:
 ; CHECK: define void @test56(
 ; CHECK-NOT: @objc
 ; CHECK: if.then:
-; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
-; CHECK-NEXT: tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: br label %if.end
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -1630,10 +1630,10 @@ if.end:                                           ; preds = %entry, %if.then
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   call void @objc_release(i8* %x) nounwind
+; CHECK-NEXT:   call void @objc_release(i8* %x) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test57(i8* %x) nounwind {
@@ -1673,10 +1673,10 @@ entry:
 
 ; CHECK:      define void @test59(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   call void @objc_release(i8* %x) nounwind
+; CHECK-NEXT:   call void @objc_release(i8* %x) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test59(i8* %x) nounwind {
@@ -1875,8 +1875,8 @@ return:                                           ; preds = %if.then, %entry
 ; rdar://11931823
 
 ; CHECK: define void @test66(
-; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
-; CHECK:   tail call void @objc_release(i8* %cond) nounwind
+; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
 ; CHECK: }
 define void @test66(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
 entry:
@@ -2224,3 +2224,6 @@ end:                                              ; preds = %if.end125, %if.end1
 !0 = metadata !{}
 
 declare i32 @__gxx_personality_v0(...)
+
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
index 1519423..899298b 100644
--- a/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -86,9 +86,9 @@ for.end:                                          ; preds = %for.body
 
 ; Delete nested retain+release pairs around loops.
 
-;      CHECK: define void @test3(i8* %a) nounwind {
+;      CHECK: define void @test3(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -112,9 +112,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test4(i8* %a) nounwind {
+;      CHECK: define void @test4(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -142,9 +142,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test5(i8* %a) nounwind {
+;      CHECK: define void @test5(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -176,9 +176,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test6(i8* %a) nounwind {
+;      CHECK: define void @test6(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -209,9 +209,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test7(i8* %a) nounwind {
+;      CHECK: define void @test7(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -242,9 +242,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test8(i8* %a) nounwind {
+;      CHECK: define void @test8(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -274,7 +274,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test9(i8* %a) nounwind {
+;      CHECK: define void @test9(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -303,7 +303,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test10(i8* %a) nounwind {
+;      CHECK: define void @test10(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -332,7 +332,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test11(i8* %a) nounwind {
+;      CHECK: define void @test11(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -362,15 +362,15 @@ exit:
 
 ; Don't delete anything if they're not balanced.
 
-;      CHECK: define void @test12(i8* %a) nounwind {
+;      CHECK: define void @test12(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %outer = tail call i8* @objc_retain(i8* %a) nounwind
-; CHECK-NEXT:   %inner = tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   %outer = tail call i8* @objc_retain(i8* %a) [[NUW]]
+; CHECK-NEXT:   %inner = tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
-; CHECK-NEXT: call void @objc_release(i8* %a) nounwind
-; CHECK-NEXT: call void @objc_release(i8* %a) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %a) [[NUW]]
+; CHECK-NEXT: call void @objc_release(i8* %a) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test12(i8* %a) nounwind {
@@ -394,4 +394,6 @@ exit:
   ret void
 }
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/contract-marker.ll b/test/Transforms/ObjCARC/contract-marker.ll
index 01d978a..01fd1e7 100644
--- a/test/Transforms/ObjCARC/contract-marker.ll
+++ b/test/Transforms/ObjCARC/contract-marker.ll
@@ -3,7 +3,7 @@
 ; CHECK:      %call = tail call i32* @qux()
 ; CHECK-NEXT: %tcall = bitcast i32* %call to i8*
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
-; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) nounwind
+; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) [[NUW:#[0-9]+]]
 
 define void @foo() {
 entry:
@@ -21,3 +21,5 @@ declare void @bar(i8*)
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
 !0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 2922f81..6999237 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -10,7 +10,7 @@ declare void @use_pointer(i8*)
 
 ; CHECK: define void @test0(
 ; CHECK: entry:
-; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret void
 define void @test0(i8* %p) {
 entry:
@@ -25,10 +25,10 @@ entry:
 
 ;      CHECK: define void @test1(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load volatile i8** @x, align 8
 ; CHECK-NEXT:   store i8* %0, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test1(i8* %p) {
@@ -44,10 +44,10 @@ entry:
 
 ;      CHECK: define void @test2(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store volatile i8* %0, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test2(i8* %p) {
@@ -64,11 +64,11 @@ entry:
 
 ; CHECK:      define void @test3(i8* %newValue) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
 ; CHECK-NEXT:   %x1 = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:   tail call void @use_pointer(i8* %x1), !clang.arc.no_objc_arc_exceptions !0
-; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test3(i8* %newValue) {
@@ -85,11 +85,11 @@ entry:
 
 ; CHECK:      define i1 @test4(i8* %newValue, i8* %foo) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
 ; CHECK-NEXT:   %x1 = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:   %t = icmp eq i8* %x1, %foo
-; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret i1 %t
 ; CHECK-NEXT: }
 define i1 @test4(i8* %newValue, i8* %foo) {
@@ -106,7 +106,7 @@ entry:
 
 ; CHECK: define i1 @test5(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
-; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
 define i1 @test5(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -121,7 +121,7 @@ entry:
 
 ; CHECK: define i1 @test6(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
-; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
 define i1 @test6(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -136,9 +136,9 @@ entry:
 
 ;      CHECK: define void @test7(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test7(i8* %p) {
@@ -155,7 +155,7 @@ entry:
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %p, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test8(i8* %p) {
@@ -167,3 +167,5 @@ entry:
 }
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
index 1510ed0..85b03be 100644
--- a/test/Transforms/ObjCARC/contract-testcases.ll
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -69,7 +69,7 @@ bb7:                                              ; preds = %bb6, %bb6, %bb5
 ; CHECK: define void @_Z6doTestP8NSString() {
 ; CHECK: invoke.cont:                                      ; preds = %entry
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
-; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) [[NUW:#[0-9]+]]
 define void @_Z6doTestP8NSString() {
 entry:
   %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
@@ -88,3 +88,6 @@ lpad:                                             ; preds = %entry
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
 !0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+
+; CHECK: attributes #0 = { optsize }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index c48f8a5..b6fba59 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -34,12 +34,12 @@ entry:
 ; Merge objc_retain and objc_autorelease into objc_retainAutorelease.
 
 ; CHECK: define void @test2(
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: }
 define void @test2(i8* %x) nounwind {
 entry:
   %0 = tail call i8* @objc_retain(i8* %x) nounwind
-  tail call i8* @objc_autorelease(i8* %0) nounwind
+  call i8* @objc_autorelease(i8* %0) nounwind
   call void @use_pointer(i8* %x)
   ret void
 }
@@ -47,7 +47,7 @@ entry:
 ; Same as test2 but the value is returned. Do an RV optimization.
 
 ; CHECK: define i8* @test2b(
-; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) [[NUW]]
 ; CHECK: }
 define i8* @test2b(i8* %x) nounwind {
 entry:
@@ -59,14 +59,14 @@ entry:
 ; Merge a retain,autorelease pair around a call.
 
 ; CHECK: define void @test3(
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %0)
 ; CHECK: }
 define void @test3(i8* %x, i64 %n) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
-  tail call i8* @objc_autorelease(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
   ret void
 }
 
@@ -75,7 +75,7 @@ entry:
 
 ; CHECK: define void @test4(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: @objc_retainAutorelease(i8* %x) nounwind
+; CHECK-NEXT: @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: @objc_release
 ; CHECK-NEXT: ret void
@@ -84,7 +84,7 @@ define void @test4(i8* %x, i64 %n) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
-  tail call i8* @objc_autorelease(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
   tail call void @objc_release(i8* %x) nounwind
   ret void
 }
@@ -92,9 +92,9 @@ entry:
 ; Don't merge retain and autorelease if they're not control-equivalent.
 
 ; CHECK: define void @test5(
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
-; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define void @test5(i8* %p, i1 %a) {
 entry:
@@ -102,7 +102,7 @@ entry:
   br i1 %a, label %true, label %false
 
 true:
-  tail call i8* @objc_autorelease(i8* %p) nounwind
+  call i8* @objc_autorelease(i8* %p) nounwind
   call void @use_pointer(i8* %p)
   ret void
 
@@ -119,8 +119,8 @@ false:
 ; Those entrypoints don't exist yet though.
 
 ; CHECK: define i8* @test6(
-; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) nounwind
-; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) nounwind
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) [[NUW]]
+; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) [[NUW]]
 ; CHECK: }
 define i8* @test6() {
   %p = call i8* @returner()
@@ -161,3 +161,5 @@ return:                                           ; preds = %if.then, %entry
   %retval = phi i8* [ %c, %if.then ], [ null, %entry ]
   ret i8* %retval
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll b/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll
new file mode 100644
index 0000000..bdee2be
--- /dev/null
+++ b/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll
@@ -0,0 +1,87 @@
+; RUN: opt -S -objc-arc < %s
+; bugzilla://14551
+; rdar://12851911
+
+; Make sure that we do not hang clang during escape analysis.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-darwin"
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_byref_foo = type { i8*, %struct.__block_byref_foo*, i32, i32, i32 }
+
+@_NSConcreteGlobalBlock = external global i8*
+@.str = private unnamed_addr constant [6 x i8] c"v8@?0\00", align 1
+@__block_descriptor_tmp = internal constant { i64, i64, i8*, i8* } { i64 0, i64 32, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8* null }
+@__block_literal_global = internal constant { i8**, i32, i32, i8*, %struct.__block_descriptor* } { i8** @_NSConcreteGlobalBlock, i32 1342177280, i32 0, i8* bitcast (void (i8*)* @__hang_clang_block_invoke to i8*), %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*) }, align 8
+
+define void @hang_clang() uwtable optsize ssp {
+entry:
+  %foo = alloca %struct.__block_byref_foo, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 1
+  store %struct.__block_byref_foo* %foo, %struct.__block_byref_foo** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 2
+  store i32 536870912, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %foo1 = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 4
+  store i32 0, i32* %foo1, align 8, !tbaa !4
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc.for.body_crit_edge, %entry
+  %0 = phi i1 [ true, %entry ], [ %phitmp, %for.inc.for.body_crit_edge ]
+  %i.06 = phi i32 [ 1, %entry ], [ %phitmp8, %for.inc.for.body_crit_edge ]
+  %block.05 = phi void (...)* [ null, %entry ], [ %block.1, %for.inc.for.body_crit_edge ]
+  br i1 %0, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %1 = call i8* @objc_retainBlock(i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) nounwind, !clang.arc.copy_on_escape !7
+  %2 = bitcast i8* %1 to void (...)*
+  %3 = bitcast void (...)* %block.05 to i8*
+  call void @objc_release(i8* %3) nounwind, !clang.imprecise_release !7
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %block.1 = phi void (...)* [ %2, %if.then ], [ %block.05, %for.body ]
+  %exitcond = icmp eq i32 %i.06, 10
+  br i1 %exitcond, label %for.end, label %for.inc.for.body_crit_edge
+
+for.inc.for.body_crit_edge:                       ; preds = %for.inc
+  %.pre = load %struct.__block_byref_foo** %byref.forwarding, align 8
+  %foo2.phi.trans.insert = getelementptr inbounds %struct.__block_byref_foo* %.pre, i64 0, i32 4
+  %.pre7 = load i32* %foo2.phi.trans.insert, align 4, !tbaa !4
+  %phitmp = icmp eq i32 %.pre7, 0
+  %phitmp8 = add i32 %i.06, 1
+  br label %for.body
+
+for.end:                                          ; preds = %for.inc
+  %4 = bitcast %struct.__block_byref_foo* %foo to i8*
+  call void @_Block_object_dispose(i8* %4, i32 8)
+  %5 = bitcast void (...)* %block.1 to i8*
+  call void @objc_release(i8* %5) nounwind, !clang.imprecise_release !7
+  ret void
+}
+
+define internal void @__hang_clang_block_invoke(i8* nocapture %.block_descriptor) nounwind uwtable readnone optsize ssp {
+entry:
+  ret void
+}
+
+declare i8* @objc_retainBlock(i8*)
+
+declare void @objc_release(i8*) nonlazybind
+
+declare void @_Block_object_dispose(i8*, i32)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!4 = metadata !{metadata !"int", metadata !5}
+!5 = metadata !{metadata !"omnipotent char", metadata !6}
+!6 = metadata !{metadata !"Simple C/C++ TBAA"}
+!7 = metadata !{}
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
new file mode 100644
index 0000000..05257d1
--- /dev/null
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -0,0 +1,174 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+; rdar://11744105
+; bugzilla://14584
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%0 = type opaque
+%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
+%struct._objc_cache = type opaque
+%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
+%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
+%struct._objc_method = type { i8*, i8*, i8* }
+%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
+%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8** }
+%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+%struct._prop_t = type { i8*, i8* }
+%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
+%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+
+@"OBJC_CLASS_$_NSObject" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_NSObject", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"new\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str = linker_private unnamed_addr constant [11 x i8] c"Failed: %@\00", align 1
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i64 10 }, section "__DATA,__cfstring"
+@"OBJC_CLASS_$_NSException" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_1" = internal global %struct._class_t* @"OBJC_CLASS_$_NSException", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@.str2 = linker_private unnamed_addr constant [4 x i8] c"Foo\00", align 1
+@_unnamed_cfstring_3 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([4 x i8]* @.str2, i32 0, i32 0), i64 3 }, section "__DATA,__cfstring"
+@"\01L_OBJC_METH_VAR_NAME_4" = internal global [14 x i8] c"raise:format:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@llvm.used = appending global [6 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" to i8*), i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_5" to i8*)], section "llvm.metadata"
+
+define i32 @main() uwtable ssp {
+entry:
+  %tmp = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
+  %tmp1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
+  %tmp2 = bitcast %struct._class_t* %tmp to i8*, !dbg !37
+; CHECK: call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1)
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1), !dbg !37, !clang.arc.no_objc_arc_exceptions !38
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !12), !dbg !37
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+  %tmp3 = call i8* @objc_retain(i8* %call) nounwind, !dbg !39
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !25), !dbg !39
+  invoke fastcc void @ThrowFunc(i8* %call)
+          to label %eh.cont unwind label %lpad, !dbg !40, !clang.arc.no_objc_arc_exceptions !38
+
+eh.cont:                                          ; preds = %entry
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
+  br label %if.end, !dbg !43
+
+lpad:                                             ; preds = %entry
+  %tmp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null, !dbg !40
+  %tmp5 = extractvalue { i8*, i32 } %tmp4, 0, !dbg !40
+  %exn.adjusted = call i8* @objc_begin_catch(i8* %tmp5) nounwind, !dbg !44
+  call void @llvm.dbg.value(metadata !45, i64 0, metadata !21), !dbg !46
+  call void @objc_end_catch(), !dbg !49, !clang.arc.no_objc_arc_exceptions !38
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
+  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !50, !clang.arc.no_objc_arc_exceptions !38
+  br label %if.end, !dbg !52
+
+if.end:                                           ; preds = %lpad, %eh.cont
+  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !53, !clang.arc.no_objc_arc_exceptions !38
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !54, !clang.imprecise_release !38
+  ret i32 0, !dbg !54
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+declare i8* @objc_retain(i8*) nonlazybind
+
+declare i8* @objc_begin_catch(i8*)
+
+declare void @objc_end_catch()
+
+declare void @objc_exception_rethrow()
+
+define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
+entry:
+  %tmp = call i8* @objc_retain(i8* %obj) nounwind
+  call void @llvm.dbg.value(metadata !{i8* %obj}, i64 0, metadata !32), !dbg !55
+  %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
+  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
+  %tmp3 = bitcast %struct._class_t* %tmp1 to i8*, !dbg !56
+  call void (i8*, i8*, %0*, %0*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*, %0*, ...)*)(i8* %tmp3, i8* %tmp2, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*), %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*)), !dbg !56, !clang.arc.no_objc_arc_exceptions !38
+  call void @objc_release(i8* %obj) nounwind, !dbg !58, !clang.imprecise_release !38
+  ret void, !dbg !58
+}
+
+declare i32 @__objc_personality_v0(...)
+
+declare void @objc_release(i8*) nonlazybind
+
+declare void @NSLog(i8*, ...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+; CHECK: attributes #0 = { ssp uwtable }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { nonlazybind }
+; CHECK: attributes #3 = { noinline ssp uwtable }
+; CHECK: attributes [[NUW]] = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33, !34, !35, !36}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", metadata !"clang version 3.3 ", i1 true, i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !27}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
+!6 = metadata !{i32 786473, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{metadata !12, metadata !21, metadata !25}
+!12 = metadata !{i32 786688, metadata !13, metadata !"obj", metadata !6, i32 11, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj] [line 11]
+!13 = metadata !{i32 786443, metadata !5, i32 10, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!14 = metadata !{i32 786454, null, metadata !"id", metadata !6, i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!16 = metadata !{i32 786451, null, metadata !"objc_object", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786445, metadata !16, metadata !"isa", metadata !6, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!19 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!20 = metadata !{i32 786451, null, metadata !"objc_class", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
+!22 = metadata !{i32 786443, metadata !13, i32 12, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!23 = metadata !{i32 786454, null, metadata !"BOOL", metadata !6, i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
+!24 = metadata !{i32 786468, null, metadata !"signed char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
+!26 = metadata !{i32 786443, metadata !22, i32 14, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!27 = metadata !{i32 786478, i32 0, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", metadata !6, i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
+!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{null, metadata !14}
+!30 = metadata !{metadata !31}
+!31 = metadata !{metadata !32}
+!32 = metadata !{i32 786689, metadata !27, metadata !"obj", metadata !6, i32 16777220, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [obj] [line 4]
+!33 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!34 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!35 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!36 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!37 = metadata !{i32 11, i32 0, metadata !13, null}
+!38 = metadata !{}
+!39 = metadata !{i32 15, i32 0, metadata !26, null}
+!40 = metadata !{i32 17, i32 0, metadata !41, null}
+!41 = metadata !{i32 786443, metadata !26, i32 16, i32 0, metadata !6, i32 3} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!42 = metadata !{i32 22, i32 0, metadata !26, null}
+!43 = metadata !{i32 23, i32 0, metadata !22, null}
+!44 = metadata !{i32 19, i32 0, metadata !41, null}
+!45 = metadata !{i8 0}
+!46 = metadata !{i32 20, i32 0, metadata !47, null}
+!47 = metadata !{i32 786443, metadata !48, i32 19, i32 0, metadata !6, i32 5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!48 = metadata !{i32 786443, metadata !26, i32 19, i32 0, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!49 = metadata !{i32 21, i32 0, metadata !47, null}
+!50 = metadata !{i32 24, i32 0, metadata !51, null}
+!51 = metadata !{i32 786443, metadata !22, i32 23, i32 0, metadata !6, i32 6} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!52 = metadata !{i32 25, i32 0, metadata !51, null}
+!53 = metadata !{i32 27, i32 0, metadata !13, null}
+!54 = metadata !{i32 28, i32 0, metadata !13, null}
+!55 = metadata !{i32 4, i32 0, metadata !27, null}
+!56 = metadata !{i32 6, i32 0, metadata !57, null}
+!57 = metadata !{i32 786443, metadata !27, i32 5, i32 0, metadata !6, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!58 = metadata !{i32 7, i32 0, metadata !57, null}
diff --git a/test/Transforms/ObjCARC/escape.ll b/test/Transforms/ObjCARC/escape.ll
index 3f694cf..8f252a0 100644
--- a/test/Transforms/ObjCARC/escape.ll
+++ b/test/Transforms/ObjCARC/escape.ll
@@ -10,8 +10,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; with the objc_storeWeak call.
 
 ; CHECK: define void @test0(
-; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) [[NUW:#[0-9]+]], !clang.arc.copy_on_escape !0
+; CHECK: call void @objc_release(i8* %tmp7) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0() nounwind {
 entry:
@@ -129,3 +129,6 @@ declare i8* @not_really_objc_storeWeak(i8**, i8*)
 declare void @objc_release(i8*)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind ssp }
diff --git a/test/Transforms/ObjCARC/gvn.ll b/test/Transforms/ObjCARC/gvn.ll
index 6917b02..3648866 100644
--- a/test/Transforms/ObjCARC/gvn.ll
+++ b/test/Transforms/ObjCARC/gvn.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -objc-arc -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -objc-arc-aa -gvn < %s | FileCheck %s
 
 @x = common global i8* null, align 8
 
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index 1a58e34..f528b4a 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -12,10 +12,10 @@ declare i8* @returner()
 
 ; CHECK: define void @test0(
 ; CHECK: invoke.cont:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW:#[0-9]+]], !clang.imprecise_release !0
 ; CHECK:   ret void
 ; CHECK: lpad:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   ret void
 define void @test0(i8* %zipFile) {
 entry:
@@ -39,11 +39,11 @@ lpad:                                             ; preds = %entry
 
 ; CHECK: define void @test1(
 ; CHECK: invoke.cont:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
 ; CHECK:   br label %done
 ; CHECK: lpad:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
 ; CHECK:   br label %done
 ; CHECK: done:
@@ -108,7 +108,7 @@ finally.rethrow:                                  ; preds = %invoke.cont, %entry
 
 ; CHECK: define void @test3(
 ; CHECK: if.end:
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test3(i8* %p, i1 %b) {
 entry:
@@ -140,10 +140,10 @@ if.end:
 ; CHECK: lpad:
 ; CHECK-NEXT: %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
 ; CHECK-NEXT: cleanup
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 ; CHECK: if.end:
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test4(i8* %p, i1 %b) {
 entry:
@@ -215,4 +215,6 @@ if.end:
 declare i32 @__gxx_personality_v0(...)
 declare i32 @__objc_personality_v0(...)
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
index 170d0a9..5d05825 100644
--- a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
+++ b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
@@ -4,7 +4,7 @@
 ; and various scary looking things and fold it into an objc_retainAutorelease.
 
 ; CHECK: bb57:
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) [[NUW:#[0-9]+]]
 ; CHECK: bb99:
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -212,10 +212,12 @@ bb99:                                             ; preds = %bb57
   br label %bb104
 
 bb104:                                            ; preds = %bb99, %bb57
-  %tmp105 = tail call i8* @objc_autorelease(i8* %tmp72) nounwind
+  %tmp105 = call i8* @objc_autorelease(i8* %tmp72) nounwind
   %tmp106 = bitcast i8* %tmp105 to %14*
   tail call void @objc_release(i8* %tmp85) nounwind
   %tmp107 = bitcast %18* %tmp47 to i8*
   tail call void @objc_release(i8* %tmp107) nounwind
   ret %14* %tmp106
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 32be03e..ca9c58b 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -770,9 +770,9 @@ forcoll.empty:
 @__block_d_tmp5 = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
 
 ; CHECK: define void @test11(
-; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
-; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test11() {
 entry:
@@ -820,3 +820,6 @@ entry:
   call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nonlazybind }
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
index 01f2087..f40be23 100644
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ b/test/Transforms/ObjCARC/retain-block-alloca.ll
@@ -9,7 +9,7 @@
 @"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 
 ; CHECK: define void @test(
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) nounwind
+; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) [[NUW:#[0-9]+]]
 ; CHECK: @objc_msgSend
 ; CHECK-NEXT: @objc_release(i8* %3)
 define void @test(%0* %array) uwtable {
@@ -87,4 +87,8 @@ declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
 declare void @objc_release(i8*)
 
+; CHECK: attributes #0 = { uwtable }
+; CHECK: attributes #1 = { nonlazybind }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-side-effects.ll b/test/Transforms/ObjCARC/retain-block-side-effects.ll
index e84d48f..7fa73cb 100644
--- a/test/Transforms/ObjCARC/retain-block-side-effects.ll
+++ b/test/Transforms/ObjCARC/retain-block-side-effects.ll
@@ -4,7 +4,7 @@
 ; objc_retainBlock stores into %repeater so the load from after the
 ; call isn't forwardable from the store before the call.
 
-; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) nounwind
+; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) [[NUW:#[0-9]+]]
 ; CHECK: %tmp17 = bitcast i8* %tmp16 to void ()*
 ; CHECK: %tmp18 = load %struct.__block_byref_repeater** %byref.forwarding, align 8
 ; CHECK: %repeater12 = getelementptr inbounds %struct.__block_byref_repeater* %tmp18, i64 0, i32 6
@@ -37,3 +37,6 @@ entry:
 }
 
 declare i8* @objc_retainBlock(i8*)
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
index b3b62d3..ee57049 100644
--- a/test/Transforms/ObjCARC/retain-block.ll
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -28,8 +28,8 @@ entry:
 ; optimization possible.
 
 ; CHECK: define void @test0_no_metadata(i8* %tmp) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW:#[0-9]+]]
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_no_metadata(i8* %tmp) {
 entry:
@@ -43,8 +43,8 @@ entry:
 ; optimization possible.
 
 ; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_escape(i8* %tmp, i8** %z) {
 entry:
@@ -58,8 +58,8 @@ entry:
 ; Same as test0_escape, but there's no intervening call.
 
 ; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_just_escape(i8* %tmp, i8** %z) {
 entry:
@@ -73,9 +73,9 @@ entry:
 
 ; CHECK: define void @test1(i8* %tmp) {
 ; CHECK-NOT: @objc
-; CHECK: tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NOT: @objc
-; CHECK: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1(i8* %tmp) {
@@ -95,10 +95,10 @@ entry:
 
 ; CHECK: define void @test1_no_metadata(i8* %tmp) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_no_metadata(i8* %tmp) {
@@ -118,11 +118,11 @@ entry:
 
 ; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
 ; CHECK-NEXT: store i8* %tmp2, i8** %z
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_escape(i8* %tmp, i8** %z) {
@@ -136,3 +136,5 @@ entry:
   tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index f876e51..e834179 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -13,7 +13,7 @@ declare void @objc_release(i8*)
 
 ; CHECK:      define i8* @test0(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   ret i8* %0
 ; CHECK-NEXT: }
 
@@ -65,3 +65,5 @@ lpad100:                                          ; preds = %invoke.cont93
 declare i32 @__gxx_personality_v0(...)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/rle-s2l.ll b/test/Transforms/ObjCARC/rle-s2l.ll
index 8f8d5c0..2865c94 100644
--- a/test/Transforms/ObjCARC/rle-s2l.ll
+++ b/test/Transforms/ObjCARC/rle-s2l.ll
@@ -57,7 +57,7 @@ define void @test2(i8** %p) {
 
 ; CHECK:      define void @test3(i8** %p) {
 ; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
-; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @use_pointer(i8* %x) [[RO:#[0-9]+]]
 ; CHECK-NEXT:   %1 = tail call i8* @objc_retain(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   ret void
@@ -74,7 +74,7 @@ define void @test3(i8** %p) {
 
 ; CHECK:      define void @test4(i8** %p) {
 ; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
-; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @use_pointer(i8* %x) [[RO]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   %y = call i8* @objc_loadWeak(i8** %p)
 ; CHECK-NEXT:   call void @use_pointer(i8* %y)
@@ -133,3 +133,6 @@ define void @test7(i8** %p, i8* %n, i8** %q, i8* %m) {
   call void @use_pointer(i8* %y)
   ret void
 }
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes [[RO]] = { readonly }
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
index 9353a19..a2fef96 100644
--- a/test/Transforms/ObjCARC/rv.ll
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -29,7 +29,7 @@ declare i8* @returner()
 ; CHECK:      define void @test0(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %x = call i8* @returner
-; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: t:
 ; CHECK-NOT: @objc_
 ; CHECK: return:
@@ -150,7 +150,7 @@ define void @test8() {
 ; Don't apply the RV optimization to autorelease if there's no retain.
 
 ; CHECK: define i8* @test9(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 define i8* @test9(i8* %p) {
   call i8* @objc_autorelease(i8* %p)
   ret i8* %p
@@ -159,8 +159,8 @@ define i8* @test9(i8* %p) {
 ; Apply the RV optimization.
 
 ; CHECK: define i8* @test10(i8* %p)
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
+; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret i8* %p
 define i8* @test10(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
@@ -174,7 +174,7 @@ define i8* @test10(i8* %p) {
 ; CHECK: define i8* @test11(i8* %p)
 ; CHECK: tail call i8* @objc_retain(i8* %p)
 ; CHECK-NEXT: call void @use_pointer(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK-NEXT: ret i8* %p
 define i8* @test11(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
@@ -201,7 +201,7 @@ define i8* @test12(i8* %p) {
 
 ; CHECK: define i8* @test13(
 ; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK: ret i8* %p
 define i8* @test13() {
   %p = call i8* @returner()
@@ -215,7 +215,7 @@ define i8* @test13() {
 ; argument is not a return value.
 
 ; CHECK: define void @test14(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test14(i8* %p) {
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
@@ -227,7 +227,7 @@ define void @test14(i8* %p) {
 
 ; CHECK: define void @test15(
 ; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test15() {
   %y = call i8* @returner()
@@ -240,7 +240,7 @@ define void @test15() {
 
 ; CHECK: define void @test16(
 ; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test16() {
   %y = call i8* @returner()
@@ -252,7 +252,7 @@ define void @test16() {
 ; argument is not a return value.
 
 ; CHECK: define void @test17(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test17(i8* %y) {
   call i8* @objc_retain(i8* %y)
@@ -265,7 +265,7 @@ define void @test17(i8* %y) {
 ; CHECK: define void @test18(
 ; CHECK-NEXT: %y = call i8* @returner()
 ; CHECK-NEXT: call void @callee()
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test18() {
   %y = call i8* @returner()
@@ -323,7 +323,7 @@ define i8* @test22(i8* %p) {
 ; Convert autoreleaseRV to autorelease.
 
 ; CHECK: define void @test23(
-; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test23(i8* %p) {
   store i8 0, i8* %p
   call i8* @objc_autoreleaseReturnValue(i8* %p)
@@ -340,3 +340,5 @@ define {}* @test24(i8* %p) {
   %s = bitcast i8* %p to {}*
   ret {}* %s
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/split-backedge.ll b/test/Transforms/ObjCARC/split-backedge.ll
index 08e2dce..5ac278a 100644
--- a/test/Transforms/ObjCARC/split-backedge.ll
+++ b/test/Transforms/ObjCARC/split-backedge.ll
@@ -4,12 +4,12 @@
 ; rdar://11256239
 
 ; CHECK: define void @test0
-; CHECK: call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call i8* @objc_retain(i8* %cond) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %cond) nounwind
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW]]
+; CHECK: call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %cond) [[NUW]]
 define void @test0() {
 entry:
   br label %while.body
@@ -46,3 +46,5 @@ declare i8* @objc_retain(i8*)
 declare void @use_pointer(i8*)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
new file mode 100644
index 0000000..74ac97c
--- /dev/null
+++ b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
@@ -0,0 +1,84 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+declare i8* @objc_release(i8* %x)
+declare i8* @objc_retain(i8* %x)
+declare i8* @objc_autorelease(i8* %x)
+declare i8* @objc_autoreleaseReturnValue(i8* %x)
+declare i8* @objc_retainAutoreleasedReturnValue(i8* %x)
+
+; Never tail call objc_autorelease.
+define i8* @test0(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
+  %tmp0 = call i8* @objc_autorelease(i8* %x)
+  ; CHECK: %tmp1 = call i8* @objc_autorelease(i8* %x)
+  %tmp1 = tail call i8* @objc_autorelease(i8* %x)
+
+  ret i8* %x
+}
+
+; Always tail call autoreleaseReturnValue.
+define i8* @test1(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp0 = call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ret i8* %x
+}
+
+; Always tail call objc_retain.
+define i8* @test2(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x)
+  %tmp0 = call i8* @objc_retain(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %x)
+  %tmp1 = tail call i8* @objc_retain(i8* %x)
+  ret i8* %x
+}
+
+define i8* @tmp(i8* %x) {
+  ret i8* %x
+}
+
+; Always tail call objc_retainAutoreleasedReturnValue.
+define i8* @test3(i8* %x) {
+entry:
+  %y = call i8* @tmp(i8* %x)
+  ; CHECK: %tmp0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  %tmp0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  %z = call i8* @tmp(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  ret i8* %x
+}
+
+; By itself, we should never change whether or not objc_release is tail called.
+define i8* @test4(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_release(i8* %x)
+  %tmp0 = call i8* @objc_release(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_release(i8* %x)
+  %tmp1 = tail call i8* @objc_release(i8* %x)
+  ret i8* %x
+}
+
+; If we convert a tail called @objc_autoreleaseReturnValue to an
+; @objc_autorelease, ensure that the tail call is removed.
+define i8* @test5(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
+  %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ret i8* %tmp0
+}
+
+; If we convert a called @objc_autorelease to an @objc_autoreleaseReturnValue,
+; ensure that the tail call is added.
+define i8* @test6(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x)
+  %tmp0 = tail call i8* @objc_retain(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp1 = call i8* @objc_autorelease(i8* %x)
+  ret i8* %x
+}
diff --git a/test/Transforms/ObjCARC/weak-copies.ll b/test/Transforms/ObjCARC/weak-copies.ll
index e1a94bb..5dab4e0 100644
--- a/test/Transforms/ObjCARC/weak-copies.ll
+++ b/test/Transforms/ObjCARC/weak-copies.ll
@@ -19,7 +19,7 @@ target triple = "x86_64-apple-darwin11.0.0"
 ; CHECK:      define void @foo() {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %call = call i8* @bar()
-; CHECK-NEXT:   call void @use(i8* %call) nounwind
+; CHECK-NEXT:   call void @use(i8* %call) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @foo() {
@@ -39,7 +39,7 @@ entry:
 
 ; Eliminate unnecessary weak pointer copies in a block initialization.
 
-; CHECK:      define void @qux(i8* %me) nounwind {
+; CHECK:      define void @qux(i8* %me) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %block = alloca %1, align 8
 ; CHECK-NOT:    alloca
@@ -84,4 +84,6 @@ declare i8* @objc_loadWeak(i8**)
 declare void @use(i8*) nounwind
 declare void @objc_destroyWeak(i8**)
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
index 0b5e415..3f28cb1 100644
--- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
+++ b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
@@ -1,7 +1,6 @@
 ; Scalar replacement was incorrectly promoting this alloca!!
 ;
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   sed "s/;.*//g" | grep "\["
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
 define i8* @test() {
 	%A = alloca [30 x i8]		; <[30 x i8]*> [#uses=1]
@@ -10,4 +9,4 @@ define i8* @test() {
 	store i8 0, i8* %B
 	ret i8* %C
 }
-
+; CHECK: alloca [
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
index cb5101c..05d9382 100644
--- a/test/Transforms/ScalarRepl/phi-cycle.ll
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -67,7 +67,7 @@ while.cond.backedge.i:                            ; preds = %if.end.i, %while.bo
 
 ; CHECK: func.exit:
 ; CHECK-NOT: load
-; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) nounwind
+; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
 func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
   %tmp3 = load i32* %x.i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
@@ -75,3 +75,6 @@ func.exit:                                        ; preds = %while.body.i.func.e
 }
 
 declare i32 @printf(i8* nocapture, ...) nounwind
+
+; CHECK: attributes #0 = { nounwind uwtable }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index 056526c..d506cdf 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -1,12 +1,13 @@
-; RUN: opt < %s -scalarrepl -S | grep "load volatile"
-; RUN: opt < %s -scalarrepl -S | grep "store volatile"
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
 	%B = getelementptr {i32,i32}* %A, i32 0, i32 0
 	store volatile i32 %T, i32* %B
+; CHECK: store volatile
 
 	%C = getelementptr {i32,i32}* %A, i32 0, i32 1
 	%X = load volatile i32* %C
+; CHECK: load volatile
 	ret i32 %X
 }
diff --git a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll b/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
index feffb4e..aba08dc 100644
--- a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
@@ -1,8 +1,6 @@
 ; Basic block #2 should not be merged into BB #3!
 ;
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   grep "br label"
-;
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 declare void @foo()
 
@@ -13,6 +11,7 @@ bb0:
 	br i1 %cond218, label %bb3, label %bb2
 bb2:		; preds = %bb0
 	call void @foo( )
+; CHECK: br label %bb3
 	br label %bb3
 bb3:		; preds = %bb2, %bb0
 	%reg117 = phi i32 [ 110, %bb2 ], [ %reg108, %bb0 ]		; <i32> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll b/test/Transforms/SimplifyCFG/PHINode.ll
index 88f32bc..25a242a 100644
--- a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
+++ b/test/Transforms/SimplifyCFG/PHINode.ll
@@ -1,10 +1,11 @@
 ; -simplifycfg is not folding blocks if there is a PHI node involved.  This 
 ; should be fixed eventually
 
-; RUN: opt < %s -simplifycfg -S | not grep br
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define i32 @main(i32 %argc) {
 ; <label>:0
+; CHECK-NOT: br label %InlinedFunctionReturnNode
 	br label %InlinedFunctionReturnNode
 InlinedFunctionReturnNode:		; preds = %0
 	%X = phi i32 [ 7, %0 ]		; <i32> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index a61867f..dd2e5d1 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -44,3 +44,44 @@ join:
   ret i8 %c
 }
 
+define i8* @test4(i1* %dummy, i8* %a, i8* %b) {
+; Test that we don't speculate an arbitrarily large number of unfolded constant
+; expressions.
+; CHECK: @test4
+
+entry:
+  %cond1 = load volatile i1* %dummy
+  br i1 %cond1, label %if, label %end
+
+if:
+  %cond2 = load volatile i1* %dummy
+  br i1 %cond2, label %then, label %end
+
+then:
+  br label %end
+
+end:
+  %x1 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 1 to i8*), %then ]
+  %x2 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 2 to i8*), %then ]
+  %x3 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 3 to i8*), %then ]
+  %x4 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 4 to i8*), %then ]
+  %x5 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 5 to i8*), %then ]
+  %x6 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 6 to i8*), %then ]
+  %x7 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 7 to i8*), %then ]
+  %x8 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 8 to i8*), %then ]
+  %x9 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 9 to i8*), %then ]
+  %x10 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 10 to i8*), %then ]
+; CHECK-NOT: select
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+
+  ret i8* %x10
+}
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 673a62b..9cd709f 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -35,7 +35,7 @@ define i32 @bar(i64 %x, i64 %y) nounwind {
 ; CHECK: @bar
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: tail call void @bees.a() [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret i32 0
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 0, i32 2
@@ -61,7 +61,7 @@ define void @bazz(i64 %x, i64 %y) nounwind {
 ; CHECK: @bazz
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.b() nounwind
+; CHECK-NEXT: tail call void @bees.b() [[NUW]]
 ; CHECK-NEXT: ret void
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 10, i32 12
@@ -86,7 +86,7 @@ define void @quux(i64 %x, i64 %y) nounwind {
 ; CHECK: @quux
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: tail call void @bees.a() [[NUW]]
 ; CHECK-NEXT: ret void
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 0, i32 0
@@ -136,3 +136,6 @@ bees:
 declare void @llvm.trap() nounwind noreturn
 declare void @bees.a() nounwind
 declare void @bees.b() nounwind
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { noreturn nounwind }
diff --git a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll b/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
index 73eb05b..16791e2 100644
--- a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
+++ b/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
@@ -1,12 +1,21 @@
-; RUN: opt < %s -simplify-libcalls -S > %t
-; RUN: grep noalias %t | count 2
-; RUN: grep nocapture %t | count 3
-; RUN: grep nounwind %t | count 3
-; RUN: grep readonly %t | count 1
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
 
+; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) #0
 declare i8* @fopen(i8*, i8*)
+
+; CHECK: declare i8 @strlen(i8* nocapture) #1
 declare i8 @strlen(i8*)
+
+; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) #0
 declare i32* @realloc(i32*, i32)
 
 ; Test deliberately wrong declaration
 declare i32 @strcpy(...)
+
+; CHECK-NOT: strcpy{{.*}}noalias
+; CHECK-NOT: strcpy{{.*}}nocapture
+; CHECK-NOT: strcpy{{.*}}nounwind
+; CHECK-NOT: strcpy{{.*}}readonly
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
index e4f8b48..97e67b2 100644
--- a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
+++ b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep "call i32 @foo"
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 declare void @bar(i32*)
 
@@ -7,6 +6,7 @@ define i32 @foo(i32 %N) {
 	%A = alloca i32, i32 %N		; <i32*> [#uses=2]
 	store i32 17, i32* %A
 	call void @bar( i32* %A )
+; CHECK: tail call i32 @foo
 	%X = tail call i32 @foo( i32 %N )		; <i32> [#uses=1]
 	ret i32 %X
 }
diff --git a/test/Transforms/TailCallElim/intervening-inst.ll b/test/Transforms/TailCallElim/intervening-inst.ll
index 0c40bd5..10dffbd 100644
--- a/test/Transforms/TailCallElim/intervening-inst.ll
+++ b/test/Transforms/TailCallElim/intervening-inst.ll
@@ -1,5 +1,5 @@
 ; This function contains intervening instructions which should be moved out of the way
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 define i32 @Test(i32 %X) {
 entry:
@@ -10,6 +10,7 @@ then.0:		; preds = %entry
 	ret i32 %tmp.4
 endif.0:		; preds = %entry
 	%tmp.10 = add i32 %X, -1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp.8 = call i32 @Test( i32 %tmp.10 )		; <i32> [#uses=1]
 	%DUMMY = add i32 %X, 1		; <i32> [#uses=0]
 	ret i32 %tmp.8
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index 7f5c36e..53c65da 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 ; PR4323
 
 ; Several cases where tail call elimination should move the load above the call,
@@ -21,6 +21,7 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -47,6 +48,7 @@ unwind:		; preds = %else
 
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -66,6 +68,7 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -94,6 +97,7 @@ unwind:		; preds = %else
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%first = load i32* %a_arg		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
 	%second = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
diff --git a/test/Transforms/TailCallElim/return_constant.ll b/test/Transforms/TailCallElim/return_constant.ll
index 48e5641..e99e57e 100644
--- a/test/Transforms/TailCallElim/return_constant.ll
+++ b/test/Transforms/TailCallElim/return_constant.ll
@@ -1,7 +1,7 @@
 ; Though this case seems to be fairly unlikely to occur in the wild, someone
 ; plunked it into the demo script, so maybe they care about it.
 ;
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 define i32 @aaa(i32 %c) {
 entry:
@@ -9,6 +9,7 @@ entry:
 	br i1 %tmp.1, label %return, label %else
 else:		; preds = %entry
 	%tmp.5 = add i32 %c, -1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp.3 = call i32 @aaa( i32 %tmp.5 )		; <i32> [#uses=0]
 	ret i32 0
 return:		; preds = %entry
diff --git a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
index 3d01d17..7049e4d 100644
--- a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
+++ b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep "tail call void @foo"
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 
 declare void @foo()
 
 define void @bar() {
-	call void @foo( )
+; CHECK: tail call void @foo()
+	call void @foo()
 	ret void
 }
 
diff --git a/test/Verifier/module-flags-1.ll b/test/Verifier/module-flags-1.ll
new file mode 100644
index 0000000..e5feaf3
--- /dev/null
+++ b/test/Verifier/module-flags-1.ll
@@ -0,0 +1,60 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check that module flags are structurally correct.
+;
+; CHECK: incorrect number of operands in module flag
+; CHECK: metadata !0
+!0 = metadata !{ i32 1 }
+; CHECK: invalid behavior operand in module flag (expected constant integer)
+; CHECK: metadata !"foo"
+!1 = metadata !{ metadata !"foo", metadata !"foo", i32 42 }
+; CHECK: invalid behavior operand in module flag (unexpected constant)
+; CHECK: i32 999
+!2 = metadata !{ i32 999, metadata !"foo", i32 43 }
+; CHECK: invalid ID operand in module flag (expected metadata string)
+; CHECK: i32 1
+!3 = metadata !{ i32 1, i32 1, i32 44 }
+; CHECK: invalid value for 'require' module flag (expected metadata pair)
+; CHECK: i32 45
+!4 = metadata !{ i32 3, metadata !"bla", i32 45 }
+; CHECK: invalid value for 'require' module flag (expected metadata pair)
+; CHECK: metadata !
+!5 = metadata !{ i32 3, metadata !"bla", metadata !{ i32 46 } }
+; CHECK: invalid value for 'require' module flag (first value operand should be a string)
+; CHECK: i32 47
+!6 = metadata !{ i32 3, metadata !"bla", metadata !{ i32 47, i32 48 } }
+
+; Check that module flags only have unique IDs.
+;
+; CHECK: module flag identifiers must be unique (or of 'require' type)
+!7 = metadata !{ i32 1, metadata !"foo", i32 49 }
+!8 = metadata !{ i32 2, metadata !"foo", i32 50 }
+; CHECK-NOT: module flag identifiers must be unique
+!9 = metadata !{ i32 2, metadata !"bar", i32 51 }
+!10 = metadata !{ i32 3, metadata !"bar", metadata !{ metadata !"bar", i32 51 } }
+
+; Check that any 'append'-type module flags are valid.
+; CHECK: invalid value for 'append'-type module flag (expected a metadata node)
+!16 = metadata !{ i32 5, metadata !"flag-2", i32 56 }
+; CHECK: invalid value for 'append'-type module flag (expected a metadata node)
+!17 = metadata !{ i32 5, metadata !"flag-3", i32 57 }
+; CHECK-NOT: invalid value for 'append'-type module flag (expected a metadata node)
+!18 = metadata !{ i32 5, metadata !"flag-4", metadata !{ i32 57 } }
+
+; Check that any 'require' module flags are valid.
+; CHECK: invalid requirement on flag, flag is not present in module
+!11 = metadata !{ i32 3, metadata !"bar",
+     metadata !{ metadata !"no-such-flag", i32 52 } }
+; CHECK: invalid requirement on flag, flag does not have the required value
+!12 = metadata !{ i32 1, metadata !"flag-0", i32 53 }
+!13 = metadata !{ i32 3, metadata !"bar",
+     metadata !{ metadata !"flag-0", i32 54 } }
+; CHECK-NOT: invalid requirement on flag, flag is not present in module
+; CHECK-NOT: invalid requirement on flag, flag does not have the required value
+!14 = metadata !{ i32 1, metadata !"flag-1", i32 55 }
+!15 = metadata !{ i32 3, metadata !"bar",
+     metadata !{ metadata !"flag-1", i32 55 } }
+
+!llvm.module.flags = !{
+  !0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15,
+  !16, !17, !18 }
diff --git a/test/lit.cfg b/test/lit.cfg
index 5a4cced..128bbe9 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -140,12 +140,16 @@ if config.test_exec_root is None:
 
 ###
 
-# Provide a target triple for mcjit tests
-mcjit_triple = config.target_triple
-# Force ELF format on Windows
-if re.search(r'cygwin|mingw32|win32', mcjit_triple):
-  mcjit_triple += "-elf"
-config.substitutions.append( ('%mcjit_triple', mcjit_triple) )
+# Provide a command line for mcjit tests
+lli_mcjit = 'lli -use-mcjit'
+# The target triple used by default by lli is the process target triple (some
+# triple appropriate for generating code for the current process) but because
+# we don't support COFF in MCJIT well enough for the tests, force ELF format on
+# Windows.  FIXME: the process target triple should be used here, but this is
+# difficult to obtain on Windows.
+if re.search(r'cygwin|mingw32|win32', config.host_triple):
+  lli_mcjit += ' -mtriple='+config.host_triple+'-elf'
+config.substitutions.append( ('%lli_mcjit', lli_mcjit) )
 
 # Provide a substition for those tests that need to run the jit to obtain data
 # but simply want use the currently considered most reliable jit for platform
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 7a328f0..bfd901a 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -1,5 +1,6 @@
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
+config.host_triple = "@LLVM_HOSTTRIPLE@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"