61 files changed, 1549 insertions, 380 deletions
diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
index 4da33a0..73ee522 100644
--- a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
+++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
@@ -16,7 +16,7 @@ for.body:                                         ; preds = %for.body, %entry
   %add53 = add nsw i64 %n1, 0, !dbg !52
   %add55 = add nsw i64 %n1, 0, !dbg !53
   %mul63 = mul nsw i64 %add53, -20995, !dbg !54
-  tail call void @llvm.dbg.value(metadata !{i64 %mul63}, i64 0, metadata !30, metadata !{metadata !"0x102"}), !dbg !55
+  tail call void @llvm.dbg.value(metadata i64 %mul63, i64 0, metadata !30, metadata !{!"0x102"}), !dbg !55
   %mul65 = mul nsw i64 %add55, -3196, !dbg !56
   %add67 = add nsw i64 0, %mul65, !dbg !57
   %add80 = add i64 0, 1024, !dbg !58
@@ -44,63 +44,63 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!36, !37}
 !llvm.ident = !{!38}
 
-!0 = metadata !{metadata !"0x11\0012\00clang version 3.6.0 \001\00\000\00\001", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [] [] []
-!1 = metadata !{metadata !"test.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !"0x2e\00\00\00\00140\000\001\000\006\00256\001\00141", metadata !1, metadata !5, metadata !6, null, void ()* @test, null, null, metadata !12} ; [ DW_TAG_subprogram ] [] [] [def] [scope 141] []
-!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [] []
-!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !7, null, null, null} ; [ DW_TAG_subroutine_type ] [] [] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !9} ; [ DW_TAG_pointer_type ] [] [] []
-!9 = metadata !{metadata !"0x16\00\0030\000\000\000\000", metadata !10, null, metadata !11} ; [ DW_TAG_typedef ] [] [] [] [from int]
-!10 = metadata !{metadata !"", metadata !""}
-!11 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [int] []
-!12 = metadata !{metadata !13, metadata !14, metadata !18, metadata !19, metadata !20, metadata !21, metadata !22, metadata !23, metadata !24, metadata !25, metadata !26, metadata !27, metadata !28, metadata !29, metadata !30, metadata !31, metadata !32, metadata !33, metadata !34, metadata !35}
-!13 = metadata !{metadata !"0x101\00\0016777356\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_arg_variable ] [] [data] []
-!14 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] []
-!15 = metadata !{metadata !"0x16\00\00183\000\000\000\000", metadata !16, null, metadata !17} ; [ DW_TAG_typedef ] [] [INT32] [] [from long int]
-!16 = metadata !{metadata !"", metadata !""}
-!17 = metadata !{metadata !"0x24\00\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [long int] []
-!18 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] []
-!19 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] []
-!20 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] []
-!21 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] []
-!22 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] []
-!23 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [] [] []
-!24 = metadata !{metadata !"0x100\00\00142\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!25 = metadata !{metadata !"0x100\00\00143\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!26 = metadata !{metadata !"0x100\00\00143\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!27 = metadata !{metadata !"0x100\00\00143\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!28 = metadata !{metadata !"0x100\00\00143\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!29 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!30 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!31 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!32 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!33 = metadata !{metadata !"0x100\00\00144\000", metadata !4, metadata !5, metadata !15} ; [ DW_TAG_auto_variable ] [  ] [] []
-!34 = metadata !{metadata !"0x100\00\00145\000", metadata !4, metadata !5, metadata !8} ; [ DW_TAG_auto_variable ] [  ] [] []
-!35 = metadata !{metadata !"0x100\00\00146\000", metadata !4, metadata !5, metadata !11} ; [ DW_TAG_auto_variable ] [  ] [] []
-!36 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!37 = metadata !{i32 2, metadata !"Debug Info Version", i32 2}
-!38 = metadata !{metadata !"clang version 3.6.0 "}
-!39 = metadata !{i32 154, i32 8, metadata !40, null}
-!40 = metadata !{metadata !"0xb\00154\008\002", metadata !1, metadata !41} ; [ DW_TAG_lexical_block ] [  ] []
-!41 = metadata !{metadata !"0xb\00154\008\001", metadata !1, metadata !42} ; [ DW_TAG_lexical_block ] [  ] []
-!42 = metadata !{metadata !"0xb\00154\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [  ] []
-!43 = metadata !{i32 157, i32 5, metadata !44, null}
-!44 = metadata !{metadata !"0xb\00154\0042\000", metadata !1, metadata !42} ; [ DW_TAG_lexical_block ] [  ] []
-!45 = metadata !{i32 159, i32 5, metadata !44, null}
-!46 = metadata !{metadata !47, metadata !47, i64 0}
-!47 = metadata !{metadata !"int", metadata !48, i64 0}
-!48 = metadata !{metadata !"omnipotent char", metadata !49, i64 0}
-!49 = metadata !{metadata !"Simple C/C++ TBAA"}
-!50 = metadata !{i32 160, i32 5, metadata !44, null}
-!51 = metadata !{i32 161, i32 5, metadata !44, null}
-!52 = metadata !{i32 188, i32 5, metadata !44, null}
-!53 = metadata !{i32 190, i32 5, metadata !44, null}
-!54 = metadata !{i32 198, i32 5, metadata !44, null}
-!55 = metadata !{i32 144, i32 13, metadata !4, null}
-!56 = metadata !{i32 200, i32 5, metadata !44, null}
-!57 = metadata !{i32 203, i32 5, metadata !44, null}
-!58 = metadata !{i32 207, i32 5, metadata !44, null}
-!59 = metadata !{i32 208, i32 5, metadata !44, null}
+!0 = !{!"0x11\0012\00clang version 3.6.0 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [] [] []
+!1 = !{!"test.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00\00\00\00140\000\001\000\006\00256\001\00141", !1, !5, !6, null, void ()* @test, null, null, !12} ; [ DW_TAG_subprogram ] [] [] [def] [scope 141] []
+!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [] []
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [] [] [from ]
+!7 = !{null, !8}
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [] [] []
+!9 = !{!"0x16\00\0030\000\000\000\000", !10, null, !11} ; [ DW_TAG_typedef ] [] [] [] [from int]
+!10 = !{!"", !""}
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [int] []
+!12 = !{!13, !14, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35}
+!13 = !{!"0x101\00\0016777356\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [] [data] []
+!14 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!15 = !{!"0x16\00\00183\000\000\000\000", !16, null, !17} ; [ DW_TAG_typedef ] [] [INT32] [] [from long int]
+!16 = !{!"", !""}
+!17 = !{!"0x24\00\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [long int] []
+!18 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!19 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!20 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!21 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!22 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!23 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!24 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!25 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!26 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!27 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!28 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!29 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!30 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!31 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!32 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
+!33 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!34 = !{!"0x100\00\00145\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [  ] [] []
+!35 = !{!"0x100\00\00146\000", !4, !5, !11} ; [ DW_TAG_auto_variable ] [  ] [] []
+!36 = !{i32 2, !"Dwarf Version", i32 4}
+!37 = !{i32 2, !"Debug Info Version", i32 2}
+!38 = !{!"clang version 3.6.0 "}
+!39 = !MDLocation(line: 154, column: 8, scope: !40)
+!40 = !{!"0xb\00154\008\002", !1, !41} ; [ DW_TAG_lexical_block ] [  ] []
+!41 = !{!"0xb\00154\008\001", !1, !42} ; [ DW_TAG_lexical_block ] [  ] []
+!42 = !{!"0xb\00154\003\000", !1, !4} ; [ DW_TAG_lexical_block ] [  ] []
+!43 = !MDLocation(line: 157, column: 5, scope: !44)
+!44 = !{!"0xb\00154\0042\000", !1, !42} ; [ DW_TAG_lexical_block ] [  ] []
+!45 = !MDLocation(line: 159, column: 5, scope: !44)
+!46 = !{!47, !47, i64 0}
+!47 = !{!"int", !48, i64 0}
+!48 = !{!"omnipotent char", !49, i64 0}
+!49 = !{!"Simple C/C++ TBAA"}
+!50 = !MDLocation(line: 160, column: 5, scope: !44)
+!51 = !MDLocation(line: 161, column: 5, scope: !44)
+!52 = !MDLocation(line: 188, column: 5, scope: !44)
+!53 = !MDLocation(line: 190, column: 5, scope: !44)
+!54 = !MDLocation(line: 198, column: 5, scope: !44)
+!55 = !MDLocation(line: 144, column: 13, scope: !4)
+!56 = !MDLocation(line: 200, column: 5, scope: !44)
+!57 = !MDLocation(line: 203, column: 5, scope: !44)
+!58 = !MDLocation(line: 207, column: 5, scope: !44)
+!59 = !MDLocation(line: 208, column: 5, scope: !44)
diff --git a/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll b/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
new file mode 100644
index 0000000..4553251
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
@@ -0,0 +1,16 @@
+;RUN: llc <%s -mattr=-neon  -mattr=-fp-armv8  | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+@t = common global i32 0, align 4
+@x = common global i32 0, align 4
+
+define void @foo() {
+entry:
+;CHECK-LABEL: foo:
+;CHECK: __floatsisf
+  %0 = load i32* @x, align 4
+  %conv = sitofp i32 %0 to float
+  store float %conv, float* bitcast (i32* @t to float*), align 4
+  ret void
+}
diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll
index 0a93edd..1d963f4 100644
--- a/test/CodeGen/AArch64/addsub-shifted.ll
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@@ -190,7 +190,7 @@ define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
 ; CHECK: ret
 }
 
-define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+define void @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64, i32 %v) {
 ; CHECK-LABEL: test_cmp:
 
   %shift1 = shl i32 %rhs32, 13
@@ -199,40 +199,46 @@ define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsl #13
 
 t2:
+  store volatile i32 %v, i32* @var32
   %shift2 = lshr i32 %rhs32, 20
   %tst2 = icmp ne i32 %lhs32, %shift2
   br i1 %tst2, label %t3, label %end
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
 
 t3:
+  store volatile i32 %v, i32* @var32
   %shift3 = ashr i32 %rhs32, 9
   %tst3 = icmp ne i32 %lhs32, %shift3
   br i1 %tst3, label %t4, label %end
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, asr #9
 
 t4:
+  store volatile i32 %v, i32* @var32
   %shift4 = shl i64 %rhs64, 43
   %tst4 = icmp uge i64 %lhs64, %shift4
   br i1 %tst4, label %t5, label %end
 ; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsl #43
 
 t5:
+  store volatile i32 %v, i32* @var32
   %shift5 = lshr i64 %rhs64, 20
   %tst5 = icmp ne i64 %lhs64, %shift5
   br i1 %tst5, label %t6, label %end
 ; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
 
 t6:
+  store volatile i32 %v, i32* @var32
   %shift6 = ashr i64 %rhs64, 59
   %tst6 = icmp ne i64 %lhs64, %shift6
   br i1 %tst6, label %t7, label %end
 ; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, asr #59
 
 t7:
-  ret i32 1
-end:
+  store volatile i32 %v, i32* @var32
+  br label %end
 
-  ret i32 0
+end:
+  ret void
 ; CHECK: ret
 }
 
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
index 6616b27..932cd75 100644
--- a/test/CodeGen/AArch64/analyze-branch.ll
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -7,8 +7,8 @@ declare void @test_true()
 declare void @test_false()
 
 ; !0 corresponds to a branch being taken, !1 to not being takne.
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+!0 = !{!"branch_weights", i32 64, i32 4}
+!1 = !{!"branch_weights", i32 4, i32 64}
 
 define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
 ; CHECK-LABEL: test_Bcc_fallthrough_taken:
diff --git a/test/CodeGen/AArch64/analyzecmp.ll b/test/CodeGen/AArch64/analyzecmp.ll
index 8962505..0b3bcd8 100644
--- a/test/CodeGen/AArch64/analyzecmp.ll
+++ b/test/CodeGen/AArch64/analyzecmp.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -O3 -mcpu=cortex-a57 < %s | FileCheck %s 
 
-; CHECK-LABLE: @test
-; CHECK: tst [[CMP:x[0-9]+]], #0x8000000000000000
-; CHECK: csel [[R0:x[0-9]+]], [[S0:x[0-9]+]], [[S1:x[0-9]+]], eq
-; CHECK: csel [[R1:x[0-9]+]], [[S2:x[0-9]+]], [[S3:x[0-9]+]], eq
+; CHECK-LABEL: @test
+; CHECK: and 
+; CHECK: csel
+; CHECK: csel
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "arm64--linux-gnueabi"
 
diff --git a/test/CodeGen/AArch64/argument-blocks.ll b/test/CodeGen/AArch64/argument-blocks.ll
new file mode 100644
index 0000000..f1dcfa6
--- /dev/null
+++ b/test/CodeGen/AArch64/argument-blocks.ll
@@ -0,0 +1,197 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DARWINPCS
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS
+
+declare void @callee(...)
+
+define float @test_hfa_regs(float, [2 x float] %in) {
+; CHECK-LABEL: test_hfa_regs:
+; CHECK: fadd s0, s1, s2
+
+  %lhs = extractvalue [2 x float] %in, 0
+  %rhs = extractvalue [2 x float] %in, 1
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+}
+
+; Check that the array gets allocated to a contiguous block on the stack (rather
+; than the default of 2 8-byte slots).
+define float @test_hfa_block([7 x float], [2 x float] %in) {
+; CHECK-LABEL: test_hfa_block:
+; CHECK: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+
+  %lhs = extractvalue [2 x float] %in, 0
+  %rhs = extractvalue [2 x float] %in, 1
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+}
+
+; Check that an HFA prevents backfilling of VFP registers (i.e. %rhs must go on
+; the stack rather than in s7).
+define float @test_hfa_block_consume([7 x float], [2 x float] %in, float %rhs) {
+; CHECK-LABEL: test_hfa_block_consume:
+; CHECK-DAG: ldr [[LHS:s[0-9]+]], [sp]
+; CHECK-DAG: ldr [[RHS:s[0-9]+]], [sp, #8]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+
+  %lhs = extractvalue [2 x float] %in, 0
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+}
+
+define float @test_hfa_stackalign([8 x float], [1 x float], [2 x float] %in) {
+; CHECK-LABEL: test_hfa_stackalign:
+; CHECK-AAPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #8]
+; CHECK-DARWINPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #4]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+  %lhs = extractvalue [2 x float] %in, 0
+  %rhs = extractvalue [2 x float] %in, 1
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+}
+
+; An HFA that ends up on the stack should not have any effect on where
+; integer-based arguments go.
+define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
+; CHECK-LABEL: test_hfa_ignores_gprs:
+; CHECK: mov x0, x1
+  ret i64 %res
+}
+
+; [2 x float] should not be promoted to double by the Darwin varargs handling,
+; but should go in an 8-byte aligned slot.
+define void @test_varargs_stackalign() {
+; CHECK-LABEL: test_varargs_stackalign:
+; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
+
+  call void(...)* @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
+  ret void
+}
+
+define i64 @test_smallstruct_block([7 x i64], [2 x i64] %in) {
+; CHECK-LABEL: test_smallstruct_block:
+; CHECK: ldp [[LHS:x[0-9]+]], [[RHS:x[0-9]+]], [sp]
+; CHECK: add x0, [[LHS]], [[RHS]]
+  %lhs = extractvalue [2 x i64] %in, 0
+  %rhs = extractvalue [2 x i64] %in, 1
+  %sum = add i64 %lhs, %rhs
+  ret i64 %sum
+}
+
+; Check that a small struct prevents backfilling of registers (i.e. %rhs
+; must go on the stack rather than in x7).
+define i64 @test_smallstruct_block_consume([7 x i64], [2 x i64] %in, i64 %rhs) {
+; CHECK-LABEL: test_smallstruct_block_consume:
+; CHECK-DAG: ldr [[LHS:x[0-9]+]], [sp]
+; CHECK-DAG: ldr [[RHS:x[0-9]+]], [sp, #16]
+; CHECK: add x0, [[LHS]], [[RHS]]
+
+  %lhs = extractvalue [2 x i64] %in, 0
+  %sum = add i64 %lhs, %rhs
+  ret i64 %sum
+}
+
+define <1 x i64> @test_v1i64_blocked([7 x double], [2 x <1 x i64>] %in) {
+; CHECK-LABEL: test_v1i64_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <1 x i64>] %in, 0
+  ret <1 x i64> %val
+}
+
+define <1 x double> @test_v1f64_blocked([7 x double], [2 x <1 x double>] %in) {
+; CHECK-LABEL: test_v1f64_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <1 x double>] %in, 0
+  ret <1 x double> %val
+}
+
+define <2 x i32> @test_v2i32_blocked([7 x double], [2 x <2 x i32>] %in) {
+; CHECK-LABEL: test_v2i32_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <2 x i32>] %in, 0
+  ret <2 x i32> %val
+}
+
+define <2 x float> @test_v2f32_blocked([7 x double], [2 x <2 x float>] %in) {
+; CHECK-LABEL: test_v2f32_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <2 x float>] %in, 0
+  ret <2 x float> %val
+}
+
+define <4 x i16> @test_v4i16_blocked([7 x double], [2 x <4 x i16>] %in) {
+; CHECK-LABEL: test_v4i16_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <4 x i16>] %in, 0
+  ret <4 x i16> %val
+}
+
+define <4 x half> @test_v4f16_blocked([7 x double], [2 x <4 x half>] %in) {
+; CHECK-LABEL: test_v4f16_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <4 x half>] %in, 0
+  ret <4 x half> %val
+}
+
+define <8 x i8> @test_v8i8_blocked([7 x double], [2 x <8 x i8>] %in) {
+; CHECK-LABEL: test_v8i8_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <8 x i8>] %in, 0
+  ret <8 x i8> %val
+}
+
+define <2 x i64> @test_v2i64_blocked([7 x double], [2 x <2 x i64>] %in) {
+; CHECK-LABEL: test_v2i64_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <2 x i64>] %in, 0
+  ret <2 x i64> %val
+}
+
+define <2 x double> @test_v2f64_blocked([7 x double], [2 x <2 x double>] %in) {
+; CHECK-LABEL: test_v2f64_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <2 x double>] %in, 0
+  ret <2 x double> %val
+}
+
+define <4 x i32> @test_v4i32_blocked([7 x double], [2 x <4 x i32>] %in) {
+; CHECK-LABEL: test_v4i32_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <4 x i32>] %in, 0
+  ret <4 x i32> %val
+}
+
+define <4 x float> @test_v4f32_blocked([7 x double], [2 x <4 x float>] %in) {
+; CHECK-LABEL: test_v4f32_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <4 x float>] %in, 0
+  ret <4 x float> %val
+}
+
+define <8 x i16> @test_v8i16_blocked([7 x double], [2 x <8 x i16>] %in) {
+; CHECK-LABEL: test_v8i16_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <8 x i16>] %in, 0
+  ret <8 x i16> %val
+}
+
+define <8 x half> @test_v8f16_blocked([7 x double], [2 x <8 x half>] %in) {
+; CHECK-LABEL: test_v8f16_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <8 x half>] %in, 0
+  ret <8 x half> %val
+}
+
+define <16 x i8> @test_v16i8_blocked([7 x double], [2 x <16 x i8>] %in) {
+; CHECK-LABEL: test_v16i8_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <16 x i8>] %in, 0
+  ret <16 x i8> %val
+}
+
+define half @test_f16_blocked([7 x double], [2 x half] %in) {
+; CHECK-LABEL: test_f16_blocked:
+; CHECK: ldr h0, [sp]
+  %val = extractvalue [2 x half] %in, 0
+  ret half %val
+}
diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
index e57a8c9..8b88c0b 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
@@ -11,7 +11,7 @@ if.then24:                                        ; preds = %entry
   unreachable
 
 if.else295:                                       ; preds = %entry
-  call void @llvm.dbg.declare(metadata !{i32* %do_tab_convert}, metadata !16, metadata !{metadata !"0x102"}), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %do_tab_convert, metadata !16, metadata !{!"0x102"}), !dbg !18
   store i32 0, i32* %do_tab_convert, align 4, !dbg !19
   unreachable
 }
@@ -21,25 +21,25 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.gv = !{!0}
 !llvm.dbg.sp = !{!1, !7, !10, !11, !12}
 
-!0 = metadata !{metadata !"0x34\00vsplive\00vsplive\00\00617\001\001", metadata !1, metadata !2, metadata !6, null, null} ; [ DW_TAG_variable ]
-!1 = metadata !{metadata !"0x2e\00drt_vsprintf\00drt_vsprintf\00\00616\000\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{metadata !"0x29", metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{metadata !"0x11\0012\00clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)\001\00\000\00\000", metadata !20, metadata !21, metadata !21, null, null, null} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !20, metadata !2, null, metadata !5, i32 0} ; [ DW_TAG_subroutine_type ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, metadata !3} ; [ DW_TAG_base_type ]
-!7 = metadata !{metadata !"0x2e\00putc_mem\00putc_mem\00\0030\001\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !8, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!8 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", metadata !20, metadata !2, null, metadata !9, i32 0} ; [ DW_TAG_subroutine_type ]
-!9 = metadata !{null}
-!10 = metadata !{metadata !"0x2e\00print_double\00print_double\00\00203\001\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{metadata !"0x2e\00print_number\00print_number\00\0075\001\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !4, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
-!12 = metadata !{metadata !"0x2e\00get_flags\00get_flags\00\00508\001\001\000\006\00256\000\000", metadata !20, metadata !2, metadata !8, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 653, i32 5, metadata !14, null}
-!14 = metadata !{metadata !"0xb\00652\0035\002", metadata !20, metadata !15} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{metadata !"0xb\00616\001\000", metadata !20, metadata !1} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{metadata !"0x100\00do_tab_convert\00853\000", metadata !17, metadata !2, metadata !6} ; [ DW_TAG_auto_variable ]
-!17 = metadata !{metadata !"0xb\00850\0012\0033", metadata !20, metadata !14} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{i32 853, i32 11, metadata !17, null}
-!19 = metadata !{i32 853, i32 29, metadata !17, null}
-!20 = metadata !{metadata !"print.i", metadata !"/Volumes/Ebi/echeng/radars/r9146594"}
-!21 = metadata !{i32 0}
+!0 = !{!"0x34\00vsplive\00vsplive\00\00617\001\001", !1, !2, !6, null, null} ; [ DW_TAG_variable ]
+!1 = !{!"0x2e\00drt_vsprintf\00drt_vsprintf\00\00616\000\001\000\006\00256\000\000", !20, !2, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\0012\00clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)\001\00\000\00\000", !20, !21, !21, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !5, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = !{!6}
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3} ; [ DW_TAG_base_type ]
+!7 = !{!"0x2e\00putc_mem\00putc_mem\00\0030\001\001\000\006\00256\000\000", !20, !2, !8, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!8 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !9, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = !{null}
+!10 = !{!"0x2e\00print_double\00print_double\00\00203\001\001\000\006\00256\000\000", !20, !2, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x2e\00print_number\00print_number\00\0075\001\001\000\006\00256\000\000", !20, !2, !4, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
+!12 = !{!"0x2e\00get_flags\00get_flags\00\00508\001\001\000\006\00256\000\000", !20, !2, !8, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!13 = !MDLocation(line: 653, column: 5, scope: !14)
+!14 = !{!"0xb\00652\0035\002", !20, !15} ; [ DW_TAG_lexical_block ]
+!15 = !{!"0xb\00616\001\000", !20, !1} ; [ DW_TAG_lexical_block ]
+!16 = !{!"0x100\00do_tab_convert\00853\000", !17, !2, !6} ; [ DW_TAG_auto_variable ]
+!17 = !{!"0xb\00850\0012\0033", !20, !14} ; [ DW_TAG_lexical_block ]
+!18 = !MDLocation(line: 853, column: 11, scope: !17)
+!19 = !MDLocation(line: 853, column: 29, scope: !17)
+!20 = !{!"print.i", !"/Volumes/Ebi/echeng/radars/r9146594"}
+!21 = !{i32 0}
diff --git a/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
index 4b037db..b5b1b70 100644
--- a/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
@@ -43,8 +43,8 @@ entry:
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!4 = metadata !{}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!4 = !{}
diff --git a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
index 7d880f3..4db1f59 100644
--- a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
+++ b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
@@ -61,7 +61,7 @@ entry:
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/AArch64/arm64-aapcs-be.ll b/test/CodeGen/AArch64/arm64-aapcs-be.ll
index 77e2b0f..f27570a 100644
--- a/test/CodeGen/AArch64/arm64-aapcs-be.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs-be.ll
@@ -21,4 +21,20 @@ entry:
 ; CHECK-DAG: strh w{{[0-9]}}, [sp, #14]
 ; CHECK-DAG: strb w{{[0-9]}}, [sp, #7]
   ret i32 %call
-}
-\ No newline at end of file
+}
+
+define float @test_block_addr([8 x float], [1 x float] %in) {
+; CHECK-LABEL: test_block_addr:
+; CHECK: ldr s0, [sp]
+  %val = extractvalue [1 x float] %in, 0
+  ret float %val
+}
+
+define void @test_block_addr_callee() {
+; CHECK-LABEL: test_block_addr_callee:
+; CHECK: str {{[a-z0-9]+}}, [sp]
+; CHECK: bl test_block_addr
+  %val = insertvalue [1 x float] undef, float 0.0, 0
+  call float @test_block_addr([8 x float] undef, [1 x float] %val)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
index deb740e..e03d7fa 100644
--- a/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -527,8 +527,8 @@ attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pi
 attributes #4 = { nounwind }
 attributes #5 = { nobuiltin }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
-!4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3}
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"short", !1}
+!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3}
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index 3377849..642d72a 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -29,8 +29,7 @@ define void @fetch_and_nand(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw nand i128* %p, i128 %bits release
   store i128 %val, i128* @var, align 16
   ret void
@@ -45,8 +44,7 @@ define void @fetch_and_or(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw or i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -61,8 +59,7 @@ define void @fetch_and_add(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw add i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -77,8 +74,7 @@ define void @fetch_and_sub(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw sub i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -99,8 +95,7 @@ define void @fetch_and_min(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw min i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -121,8 +116,7 @@ define void @fetch_and_max(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw max i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -143,8 +137,7 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw umin i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -165,8 +158,7 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw umax i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
diff --git a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
index 664a26c..b032d9c 100644
--- a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -184,7 +184,7 @@ declare hidden fastcc i32 @Maze1Mech(i64, i64, i64, i64, i64, i32, i32) nounwind
 ; Materializable
 declare hidden fastcc void @CleanNet(i64) nounwind ssp
 
-!0 = metadata !{metadata !"long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
+!0 = !{!"long", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"any pointer", !1}
diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
index b74ece8..508df7c 100644
--- a/test/CodeGen/AArch64/arm64-cse.ll
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 -aarch64-gep-opt=false | FileCheck %s
+; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 -aarch64-gep-opt=false -verify-machineinstrs | FileCheck %s
 target triple = "arm64-apple-ios"
 
 ; rdar://12462006
diff --git a/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
index 8a744c5..a9b8024 100644
--- a/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
+++ b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
@@ -19,6 +19,6 @@ define internal fastcc void @callee(i32* nocapture %p, i32 %a) nounwind optsize
   ret void
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
index e51c38b..e41e19e 100644
--- a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
+++ b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -6,7 +6,7 @@
 ; rdar://11855286
 define double @foo0(<2 x i64> %a) nounwind {
 ; CHECK:  scvtf.2d  [[REG:v[0-9]+]], v0, #9
-; CHECK-NEXT:  ins.d v0[0], [[REG]][1]
+; CHECK-NEXT:  mov  d0, [[REG]][1]
   %vecext = extractelement <2 x i64> %a, i32 1
   %fcvt_n = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9)
   ret double %fcvt_n
diff --git a/test/CodeGen/AArch64/arm64-fold-address.ll b/test/CodeGen/AArch64/arm64-fold-address.ll
index 96cc3e9..1f0b918 100644
--- a/test/CodeGen/AArch64/arm64-fold-address.ll
+++ b/test/CodeGen/AArch64/arm64-fold-address.ll
@@ -72,8 +72,8 @@ entry:
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!4 = metadata !{}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!4 = !{}
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
index c118f10..917911a 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
@@ -34,7 +34,7 @@ declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1
 attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"double", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"double", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/AArch64/arm64-ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll
index 5a98626..a9fa4ca 100644
--- a/test/CodeGen/AArch64/arm64-ldp.ll
+++ b/test/CodeGen/AArch64/arm64-ldp.ll
@@ -12,6 +12,18 @@ define i32 @ldp_int(i32* %p) nounwind {
   ret i32 %add
 }
 
+; CHECK: ldp_sext_int
+; CHECK: ldpsw
+define i64 @ldp_sext_int(i32* %p) nounwind {
+  %tmp = load i32* %p, align 4
+  %add.ptr = getelementptr inbounds i32* %p, i64 1
+  %tmp1 = load i32* %add.ptr, align 4
+  %sexttmp = sext i32 %tmp to i64
+  %sexttmp1 = sext i32 %tmp1 to i64
+  %add = add nsw i64 %sexttmp1, %sexttmp
+  ret i64 %add
+}
+
 ; CHECK: ldp_long
 ; CHECK: ldp
 define i64 @ldp_long(i64* %p) nounwind {
@@ -56,6 +68,21 @@ define i32 @ldur_int(i32* %a) nounwind {
   ret i32 %tmp3
 }
 
+define i64 @ldur_sext_int(i32* %a) nounwind {
+; LDUR_CHK: ldur_sext_int
+; LDUR_CHK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
+; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i32* %a, i32 -1
+  %tmp1 = load i32* %p1, align 2
+  %p2 = getelementptr inbounds i32* %a, i32 -2
+  %tmp2 = load i32* %p2, align 2
+  %sexttmp1 = sext i32 %tmp1 to i64
+  %sexttmp2 = sext i32 %tmp2 to i64
+  %tmp3 = add i64 %sexttmp1, %sexttmp2
+  ret i64 %tmp3
+}
+
 define i64 @ldur_long(i64* %a) nounwind ssp {
 ; LDUR_CHK: ldur_long
 ; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
@@ -110,6 +137,22 @@ define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
   ret i64 %tmp3
 }
 
+define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
+; LDUR_CHK: pairUpBarelyInSext
+; LDUR_CHK-NOT: ldur
+; LDUR_CHK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
+; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i32* %a, i64 -63
+  %tmp1 = load i32* %p1, align 2
+  %p2 = getelementptr inbounds i32* %a, i64 -64
+  %tmp2 = load i32* %p2, align 2
+  %sexttmp1 = sext i32 %tmp1 to i64
+  %sexttmp2 = sext i32 %tmp2 to i64
+  %tmp3 = add i64 %sexttmp1, %sexttmp2
+  ret i64 %tmp3
+}
+
 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
 ; LDUR_CHK: pairUpBarelyOut
 ; LDUR_CHK-NOT: ldp
@@ -125,6 +168,23 @@ define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
   ret i64 %tmp3
 }
 
+define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
+; LDUR_CHK: pairUpBarelyOutSext
+; LDUR_CHK-NOT: ldp
+; Don't be fragile about which loads or manipulations of the base register
+; are used---just check that there isn't an ldp before the add
+; LDUR_CHK: add
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i32* %a, i64 -64
+  %tmp1 = load i32* %p1, align 2
+  %p2 = getelementptr inbounds i32* %a, i64 -65
+  %tmp2 = load i32* %p2, align 2
+  %sexttmp1 = sext i32 %tmp1 to i64
+  %sexttmp2 = sext i32 %tmp2 to i64
+  %tmp3 = add i64 %sexttmp1, %sexttmp2
+  ret i64 %tmp3
+}
+
 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
 ; LDUR_CHK: pairUpNotAligned
 ; LDUR_CHK-NOT: ldp
@@ -147,3 +207,28 @@ define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
   %tmp3 = add i64 %tmp1, %tmp2
   ret i64 %tmp3
 }
+
+define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
+; LDUR_CHK: pairUpNotAlignedSext
+; LDUR_CHK-NOT: ldp
+; LDUR_CHK: ldursw
+; LDUR_CHK-NEXT: ldursw
+; LDUR_CHK-NEXT: add
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i32* %a, i64 -18
+  %bp1 = bitcast i32* %p1 to i8*
+  %bp1p1 = getelementptr inbounds i8* %bp1, i64 1
+  %dp1 = bitcast i8* %bp1p1 to i32*
+  %tmp1 = load i32* %dp1, align 1
+
+  %p2 = getelementptr inbounds i32* %a, i64 -17
+  %bp2 = bitcast i32* %p2 to i8*
+  %bp2p1 = getelementptr inbounds i8* %bp2, i64 1
+  %dp2 = bitcast i8* %bp2p1 to i32*
+  %tmp2 = load i32* %dp2, align 1
+
+  %sexttmp1 = sext i32 %tmp1 to i64
+  %sexttmp2 = sext i32 %tmp2 to i64
+  %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
diff --git a/test/CodeGen/AArch64/arm64-named-reg-alloc.ll b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
index d86d2e6..0c56454 100644
--- a/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
+++ b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
@@ -11,4 +11,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"x5\00"}
+!0 = !{!"x5\00"}
diff --git a/test/CodeGen/AArch64/arm64-named-reg-notareg.ll b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
index 3ca14c4..759bc15 100644
--- a/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
+++ b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
@@ -10,4 +10,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"notareg\00"}
+!0 = !{!"notareg\00"}
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index 1cfba82..4a92c3d 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -188,7 +188,7 @@ define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
 
 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
 ; CHECK-LABEL: ins2f1:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp3 = extractelement <2 x double> %tmp1, i32 1
   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
   ret <1 x double> %tmp4
diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index 95c582a..d334c08 100644
--- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -204,3 +204,18 @@ define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c,
   %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
   ret <2 x double> %e
 }
+
+; Special case: when the select condition is an icmp with i1 operands, don't
+; do the comparison on vectors.
+; Part of PR21549.
+define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_select_cc_v2i32_icmpi1:
+; CHECK: tst   w0, #0x1
+; CHECK: csetm [[MASK:w[0-9]+]], ne
+; CHECK: dup   [[DUPMASK:v[0-9]+]].2s, [[MASK]]
+; CHECK: bsl   [[DUPMASK]].8b, v0.8b, v1.8b
+; CHECK: mov   v0.16b, [[DUPMASK]].16b
+  %cmp = icmp ne i1 %cc, 0
+  %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %e
+}
diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll
index 651c793..b0d3ee0 100644
--- a/test/CodeGen/AArch64/arm64-platform-reg.ll
+++ b/test/CodeGen/AArch64/arm64-platform-reg.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
+; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
 
 ; x18 is reserved as a platform register on Darwin but not on other
@@ -16,11 +17,11 @@ define void @keep_live() {
 ; CHECK: ldr x18
 ; CHECK: str x18
 
-; CHECK-DARWIN-NOT: ldr fp
-; CHECK-DARWIN-NOT: ldr x18
-; CHECK-DARWIN: Spill
-; CHECK-DARWIN-NOT: ldr fp
-; CHECK-DARWIN-NOT: ldr x18
-; CHECK-DARWIN: ret
+; CHECK-RESERVE-X18-NOT: ldr fp
+; CHECK-RESERVE-X18-NOT: ldr x18
+; CHECK-RESERVE-X18: Spill
+; CHECK-RESERVE-X18-NOT: ldr fp
+; CHECK-RESERVE-X18-NOT: ldr x18
+; CHECK-RESERVE-X18: ret
   ret void
 }
diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll
index 117ab3a..b0b529a 100644
--- a/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -4,7 +4,8 @@
 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
-; CHECK: fmov	s0, w0
+; CHECK: ubfx	x{{[0-9]+}}
+; CHECK: fmov	d0, x{{[0-9]+}}
 ; CHECK: cnt.8b	v0, v0
 ; CHECK: uaddlv.8b	h0, v0
 ; CHECK: fmov w0, s0
@@ -15,7 +16,24 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
 ; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
 ; CHECK-NONEON: mul
+}
 
+define i32 @cnt32_advsimd_2(<2 x i32> %x) {
+  %1 = extractelement <2 x i32> %x, i64 0
+  %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
+  ret i32 %2
+; CHECK: fmov	w0, s0
+; CHECK: fmov	d0, x0
+; CHECK: cnt.8b	v0, v0
+; CHECK: uaddlv.8b	h0, v0
+; CHECK: fmov w0, s0
+; CHECK: ret
+; CHECK-NONEON-LABEL: cnt32_advsimd_2
+; CHECK-NONEON-NOT: 8b
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x55555555
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
+; CHECK-NONEON: mul
 }
 
 define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
diff --git a/test/CodeGen/AArch64/arm64-prefetch.ll b/test/CodeGen/AArch64/arm64-prefetch.ll
index 9dc6301..aac3515 100644
--- a/test/CodeGen/AArch64/arm64-prefetch.ll
+++ b/test/CodeGen/AArch64/arm64-prefetch.ll
@@ -117,7 +117,7 @@ entry:
 
 declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"any pointer", !1}
diff --git a/test/CodeGen/AArch64/arm64-promote-const.ll b/test/CodeGen/AArch64/arm64-promote-const.ll
index 380ff55..5dd92a7 100644
--- a/test/CodeGen/AArch64/arm64-promote-const.ll
+++ b/test/CodeGen/AArch64/arm64-promote-const.ll
@@ -41,8 +41,7 @@ entry:
 ; PROMOTED-LABEL: test2:
 ; In stress mode, constant vector are promoted
 ; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1:__PromotedConst[0-9]+]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
 ; Destination register is defined by ABI
 ; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
 ; PROMOTED-NEXT: mla.16b v0, v0, v[[REGNUM]]
@@ -64,51 +63,23 @@ entry:
   ret <16 x i8> %add.i9
 }
 
-; Two different uses of the sane constant in two different basic blocks,
+; Two different uses of the same constant in two different basic blocks,
 ; one dominates the other
 define <16 x i8> @test3(<16 x i8> %arg, i32 %path) {
 ; PROMOTED-LABEL: test3:
 ; In stress mode, constant vector are promoted
 ; Since, the constant is the same as the previous function,
 ; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
-; Destination register is defined by ABI
-; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
-; PROMOTED-NEXT: cbnz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV2:__PromotedConst[0-9]+]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV2]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM]], {{\[}}[[BASEADDR]]]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; PROMOTED-NEXT: add.16b v0, v0, [[DESTV]]
-; PROMOTED-NEXT: ret
+; PROMOTED: ldr
+; PROMOTED: ldr
+; PROMOTED-NOT: ldr
+; PROMOTED: ret
 
 ; REGULAR-LABEL: test3:
-; Regular mode does not elimitate common sub expression by its own.
-; In other words, the same loads appears several times.
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
-; REGULAR-NEXT: cbz w0, [[LABELelse:LBB.*]]
-; Next BB
-; Redundant load
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF]
-; REGULAR-NEXT: b [[LABELend:LBB.*]]
-; Next BB
-; REGULAR-NEXT: [[LABELelse]]
-; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL2]]@PAGEOFF]
-; Next BB
-; REGULAR-NEXT: [[LABELend]]:
-; REGULAR-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; REGULAR-NEXT: add.16b v0, v0, [[DESTV]]
-; REGULAR-NEXT: ret
+; REGULAR: ldr
+; REGULAR: ldr
+; REGULAR-NOT: ldr
+; REGULAR: ret
 entry:
   %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
   %tobool = icmp eq i32 %path, 0
@@ -135,34 +106,14 @@ define <16 x i8> @test4(<16 x i8> %arg, i32 %path) {
 ; In stress mode, constant vector are promoted
 ; Since, the constant is the same as the previous function,
 ; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
-; Destination register is defined by ABI
-; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
-; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: mul.16b v0, v0, v[[REGNUM]]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: ret
-
+; PROMOTED: ldr
+; PROMOTED-NOT: ldr
+; PROMOTED: ret
 
 ; REGULAR-LABEL: test4:
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
-; REGULAR-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; Redundant expression
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: mul.16b v0, v0, v[[REGNUM]]
-; Next BB
-; REGULAR-NEXT: [[LABEL]]:
-; REGULAR-NEXT: ret
+; REGULAR: ldr
+; REGULAR-NOT: ldr
+; REGULAR: ret
 entry:
   %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
   %tobool = icmp eq i32 %path, 0
@@ -184,40 +135,13 @@ define <16 x i8> @test5(<16 x i8> %arg, i32 %path) {
 ; In stress mode, constant vector are promoted
 ; Since, the constant is the same as the previous function,
 ; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
-; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; PROMOTED-NEXT: mul.16b v[[REGNUM]], [[DESTV]], v[[REGNUM]]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[REGNUM]], v[[REGNUM]]
-; PROMOTED-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
-; PROMOTED-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
-; PROMOTED-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
-; PROMOTED-NEXT: ret
+; PROMOTED: ldr
+; PROMOTED-NOT: ldr
+; PROMOTED: ret
 
 ; REGULAR-LABEL: test5:
-; REGULAR: cbz w0, [[LABELelse:LBB.*]]
-; Next BB
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; REGULAR-NEXT: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; REGULAR-NEXT: mul.16b v[[DESTREGNUM:[0-9]+]], [[DESTV]], v[[REGNUM]]
-; REGULAR-NEXT: b [[LABELend:LBB.*]]
-; Next BB
-; REGULAR-NEXT: [[LABELelse]]
-; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[DESTREGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; Next BB
-; REGULAR-NEXT: [[LABELend]]:
-; REGULAR-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[DESTREGNUM]], v[[DESTREGNUM]]
-; REGULAR-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
-; REGULAR-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
-; REGULAR-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
-; REGULAR-NEXT: ret
+; REGULAR: ldr
+; REGULAR: ret
 entry:
   %tobool = icmp eq i32 %path, 0
   br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/AArch64/arm64-st1.ll b/test/CodeGen/AArch64/arm64-st1.ll
index 4370484..76d52f4 100644
--- a/test/CodeGen/AArch64/arm64-st1.ll
+++ b/test/CodeGen/AArch64/arm64-st1.ll
@@ -8,6 +8,26 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) {
   ret void
 }
 
+define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_16b
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.b { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i8* %D, i64 %offset
+  %tmp = extractelement <16 x i8> %A, i32 1
+  store i8 %tmp, i8* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_16b
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.b { v0 }[0], [x[[XREG]]]
+  %ptr = getelementptr i8* %D, i64 %offset
+  %tmp = extractelement <16 x i8> %A, i32 0
+  store i8 %tmp, i8* %ptr
+  ret void
+}
+
 define void @st1lane_8h(<8 x i16> %A, i16* %D) {
 ; CHECK-LABEL: st1lane_8h
 ; CHECK: st1.h
@@ -16,6 +36,25 @@ define void @st1lane_8h(<8 x i16> %A, i16* %D) {
   ret void
 }
 
+define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_8h
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.h { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i16* %D, i64 %offset
+  %tmp = extractelement <8 x i16> %A, i32 1
+  store i16 %tmp, i16* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_8h
+; CHECK: str h0, [x0, x1, lsl #1]
+  %ptr = getelementptr i16* %D, i64 %offset
+  %tmp = extractelement <8 x i16> %A, i32 0
+  store i16 %tmp, i16* %ptr
+  ret void
+}
+
 define void @st1lane_4s(<4 x i32> %A, i32* %D) {
 ; CHECK-LABEL: st1lane_4s
 ; CHECK: st1.s
@@ -24,6 +63,25 @@ define void @st1lane_4s(<4 x i32> %A, i32* %D) {
   ret void
 }
 
+define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_4s
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.s { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i32* %D, i64 %offset
+  %tmp = extractelement <4 x i32> %A, i32 1
+  store i32 %tmp, i32* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4s
+; CHECK: str s0, [x0, x1, lsl #2]
+  %ptr = getelementptr i32* %D, i64 %offset
+  %tmp = extractelement <4 x i32> %A, i32 0
+  store i32 %tmp, i32* %ptr
+  ret void
+}
+
 define void @st1lane_4s_float(<4 x float> %A, float* %D) {
 ; CHECK-LABEL: st1lane_4s_float
 ; CHECK: st1.s
@@ -32,6 +90,25 @@ define void @st1lane_4s_float(<4 x float> %A, float* %D) {
   ret void
 }
 
+define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_4s_float
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.s { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr float* %D, i64 %offset
+  %tmp = extractelement <4 x float> %A, i32 1
+  store float %tmp, float* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4s_float
+; CHECK: str s0, [x0, x1, lsl #2]
+  %ptr = getelementptr float* %D, i64 %offset
+  %tmp = extractelement <4 x float> %A, i32 0
+  store float %tmp, float* %ptr
+  ret void
+}
+
 define void @st1lane_2d(<2 x i64> %A, i64* %D) {
 ; CHECK-LABEL: st1lane_2d
 ; CHECK: st1.d
@@ -40,6 +117,25 @@ define void @st1lane_2d(<2 x i64> %A, i64* %D) {
   ret void
 }
 
+define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_2d
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.d { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i64* %D, i64 %offset
+  %tmp = extractelement <2 x i64> %A, i32 1
+  store i64 %tmp, i64* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2d
+; CHECK: str d0, [x0, x1, lsl #3]
+  %ptr = getelementptr i64* %D, i64 %offset
+  %tmp = extractelement <2 x i64> %A, i32 0
+  store i64 %tmp, i64* %ptr
+  ret void
+}
+
 define void @st1lane_2d_double(<2 x double> %A, double* %D) {
 ; CHECK-LABEL: st1lane_2d_double
 ; CHECK: st1.d
@@ -48,6 +144,25 @@ define void @st1lane_2d_double(<2 x double> %A, double* %D) {
   ret void
 }
 
+define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_2d_double
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.d { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr double* %D, i64 %offset
+  %tmp = extractelement <2 x double> %A, i32 1
+  store double %tmp, double* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2d_double
+; CHECK: str d0, [x0, x1, lsl #3]
+  %ptr = getelementptr double* %D, i64 %offset
+  %tmp = extractelement <2 x double> %A, i32 0
+  store double %tmp, double* %ptr
+  ret void
+}
+
 define void @st1lane_8b(<8 x i8> %A, i8* %D) {
 ; CHECK-LABEL: st1lane_8b
 ; CHECK: st1.b
@@ -56,6 +171,26 @@ define void @st1lane_8b(<8 x i8> %A, i8* %D) {
   ret void
 }
 
+define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_8b
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.b { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i8* %D, i64 %offset
+  %tmp = extractelement <8 x i8> %A, i32 1
+  store i8 %tmp, i8* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_8b
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.b { v0 }[0], [x[[XREG]]]
+  %ptr = getelementptr i8* %D, i64 %offset
+  %tmp = extractelement <8 x i8> %A, i32 0
+  store i8 %tmp, i8* %ptr
+  ret void
+}
+
 define void @st1lane_4h(<4 x i16> %A, i16* %D) {
 ; CHECK-LABEL: st1lane_4h
 ; CHECK: st1.h
@@ -64,6 +199,25 @@ define void @st1lane_4h(<4 x i16> %A, i16* %D) {
   ret void
 }
 
+define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_4h
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.h { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i16* %D, i64 %offset
+  %tmp = extractelement <4 x i16> %A, i32 1
+  store i16 %tmp, i16* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4h
+; CHECK: str h0, [x0, x1, lsl #1]
+  %ptr = getelementptr i16* %D, i64 %offset
+  %tmp = extractelement <4 x i16> %A, i32 0
+  store i16 %tmp, i16* %ptr
+  ret void
+}
+
 define void @st1lane_2s(<2 x i32> %A, i32* %D) {
 ; CHECK-LABEL: st1lane_2s
 ; CHECK: st1.s
@@ -72,6 +226,25 @@ define void @st1lane_2s(<2 x i32> %A, i32* %D) {
   ret void
 }
 
+define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_2s
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.s { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i32* %D, i64 %offset
+  %tmp = extractelement <2 x i32> %A, i32 1
+  store i32 %tmp, i32* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2s
+; CHECK: str s0, [x0, x1, lsl #2]
+  %ptr = getelementptr i32* %D, i64 %offset
+  %tmp = extractelement <2 x i32> %A, i32 0
+  store i32 %tmp, i32* %ptr
+  ret void
+}
+
 define void @st1lane_2s_float(<2 x float> %A, float* %D) {
 ; CHECK-LABEL: st1lane_2s_float
 ; CHECK: st1.s
@@ -80,6 +253,25 @@ define void @st1lane_2s_float(<2 x float> %A, float* %D) {
   ret void
 }
 
+define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_2s_float
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.s { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr float* %D, i64 %offset
+  %tmp = extractelement <2 x float> %A, i32 1
+  store float %tmp, float* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2s_float
+; CHECK: str s0, [x0, x1, lsl #2]
+  %ptr = getelementptr float* %D, i64 %offset
+  %tmp = extractelement <2 x float> %A, i32 0
+  store float %tmp, float* %ptr
+  ret void
+}
+
 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
 ; CHECK-LABEL: st2lane_16b
 ; CHECK: st2.b
diff --git a/test/CodeGen/AArch64/arm64-stackmap-nops.ll b/test/CodeGen/AArch64/arm64-stackmap-nops.ll
new file mode 100644
index 0000000..5915b64
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-stackmap-nops.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
+
+define void @test_shadow_optimization() {
+entry:
+; Expect 8 bytes worth of nops here rather than 16: With the shadow optimization
+; in place, 8 bytes will be consumed by the frame teardown and return instr.
+; CHECK-LABEL: test_shadow_optimization:
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NOT:  nop
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  0, i32  16)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/AArch64/arm64-stackpointer.ll b/test/CodeGen/AArch64/arm64-stackpointer.ll
index 581faf1..a33de8c 100644
--- a/test/CodeGen/AArch64/arm64-stackpointer.ll
+++ b/test/CodeGen/AArch64/arm64-stackpointer.ll
@@ -21,4 +21,4 @@ declare void @llvm.write_register.i64(metadata, i64) nounwind
 
 ; register unsigned long current_stack_pointer asm("sp");
 ; CHECK-NOT: .asciz  "sp"
-!0 = metadata !{metadata !"sp\00"}
+!0 = !{!"sp\00"}
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
index e8a83fd..30ea63b 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
@@ -20,7 +20,7 @@ define i32 @test_generaldynamic() {
 ; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
 ; CHECK: ldr w0, [x[[TP]], x0]
 
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
@@ -43,7 +43,7 @@ define i32* @test_generaldynamic_addr() {
 ; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
 ; CHECK: add x0, [[TP]], x0
 
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
@@ -73,7 +73,7 @@ define i32 @test_localdynamic() {
 
 ; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]]
 
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
@@ -101,7 +101,7 @@ define i32* @test_localdynamic_addr() {
 
 ; CHECK: add x0, [[TPIDR]], [[TPREL]]
 
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
diff --git a/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll b/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
index a7f5215..923742d 100644
--- a/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
@@ -22,10 +22,10 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.6.0 "}
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
-!5 = metadata !{metadata !6, metadata !6, i64 0}
-!6 = metadata !{metadata !"int", metadata !3, i64 0}
+!0 = !{!"clang version 3.6.0 "}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
index 36a7bfd..44f2af1 100644
--- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -12,6 +12,7 @@ define void @test_simple(i32 %n, ...) {
 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
 
 ; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
 
 ; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]]
 ; ... omit middle ones ...
@@ -21,11 +22,10 @@ define void @test_simple(i32 %n, ...) {
 ; ... omit middle ones ...
 ; CHECK: stp q6, q7, [sp, #
 
-; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
+; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
 
 ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
 ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
 ; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
 
 ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
@@ -50,6 +50,7 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
 
 ; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
 
 ; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]]
 ; ... omit middle ones ...
@@ -59,11 +60,10 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
 ; ... omit middle ones ...
 ; CHECK: str q7, [sp, #
 
-; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
+; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
 
 ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
 ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
 ; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
 
 ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
@@ -89,18 +89,20 @@ define void @test_nospare([8 x i64], [8 x float], ...) {
   call void @llvm.va_start(i8* %addr)
 ; CHECK-NOT: sub sp, sp
 ; CHECK: mov [[STACK:x[0-9]+]], sp
-; CHECK: str [[STACK]], [{{x[0-9]+}}, :lo12:var]
+; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
+; CHECK: str [[STACK]], [x[[VAR]]]
 
   ret void
 }
 
 ; If there are non-variadic arguments on the stack (here two i64s) then the
 ; __stack field should point just past them.
-define void @test_offsetstack([10 x i64], [3 x float], ...) {
+define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
 ; CHECK-LABEL: test_offsetstack:
 ; CHECK: sub sp, sp, #80
 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
-; CHECK: str [[STACK_TOP]], [{{x[0-9]+}}, :lo12:var]
+; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
+; CHECK: str [[STACK_TOP]], [x[[VAR]]]
 
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
diff --git a/test/CodeGen/AArch64/arm64-vshuffle.ll b/test/CodeGen/AArch64/arm64-vshuffle.ll
index 62fd961..75e0d80 100644
--- a/test/CodeGen/AArch64/arm64-vshuffle.ll
+++ b/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -29,14 +29,14 @@ entry:
 }
 
 ; CHECK: lCPI1_0:
-; CHECK:          .byte   2                       ; 0x2
+; CHECK:          .byte   0                       ; 0x0
 ; CHECK:          .byte   255                     ; 0xff
-; CHECK:          .byte   6                       ; 0x6
+; CHECK:          .byte   2                       ; 0x2
 ; CHECK:          .byte   255                     ; 0xff
 ; CHECK:          .byte   10                      ; 0xa
 ; CHECK:          .byte   12                      ; 0xc
 ; CHECK:          .byte   14                      ; 0xe
-; CHECK:          .byte   0                       ; 0x0
+; CHECK:          .byte   7                       ; 0x7
 ; CHECK: test2
 ; CHECK: ldr     d[[REG0:[0-9]+]], [{{.*}}, lCPI1_0@PAGEOFF]
 ; CHECK: adrp    x[[REG2:[0-9]+]], lCPI1_1@PAGE
@@ -82,22 +82,22 @@ bb:
   ret <16 x i1> %Shuff
 }
 ; CHECK: lCPI3_1:
-; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   1                       ; 0x1
-; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   18                      ; 0x12
-; CHECK:         .byte   10                      ; 0xa
-; CHECK:         .byte   12                      ; 0xc
-; CHECK:         .byte   14                      ; 0xe
 ; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   1                       ; 0x1
 ; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   31                      ; 0x1f
+; CHECK:         .byte   18                      ; 0x12
+; CHECK:         .byte   4                       ; 0x4
+; CHECK:         .byte   5                       ; 0x5
 ; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   30                      ; 0x1e
+; CHECK:         .byte   7                       ; 0x7
+; CHECK:         .byte   8                       ; 0x8
+; CHECK:         .byte   31                      ; 0x1f
 ; CHECK:         .byte   10                      ; 0xa
+; CHECK:         .byte   30                      ; 0x1e
 ; CHECK:         .byte   12                      ; 0xc
+; CHECK:         .byte   13                      ; 0xd
 ; CHECK:         .byte   14                      ; 0xe
-; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   15                      ; 0xf
 ; CHECK: _test4:
 ; CHECK:         ldr     q[[REG1:[0-9]+]]
 ; CHECK:         movi.2d v[[REG0:[0-9]+]], #0000000000000000
diff --git a/test/CodeGen/AArch64/bitcast-v2i8.ll b/test/CodeGen/AArch64/bitcast-v2i8.ll
new file mode 100644
index 0000000..4bdac64
--- /dev/null
+++ b/test/CodeGen/AArch64/bitcast-v2i8.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
+
+; Part of PR21549: going through the stack isn't ideal but is correct.
+
+define i16 @test_bitcast_v2i8_to_i16(<2 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v2i8_to_i16
+; CHECK:      mov.s   [[WREG_HI:w[0-9]+]], v0[1]
+; CHECK-NEXT: fmov    [[WREG_LO:w[0-9]+]], s0
+; CHECK-NEXT: strb    [[WREG_HI]], [sp, #15]
+; CHECK-NEXT: strb    [[WREG_LO]], [sp, #14]
+; CHECK-NEXT: ldrh    w0, [sp, #14]
+
+  %aa = bitcast <2 x i8> %a to i16
+  ret i16 %aa
+}
diff --git a/test/CodeGen/AArch64/br-to-eh-lpad.ll b/test/CodeGen/AArch64/br-to-eh-lpad.ll
new file mode 100644
index 0000000..20bffd9
--- /dev/null
+++ b/test/CodeGen/AArch64/br-to-eh-lpad.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=aarch64-apple-ios -verify-machineinstrs
+
+; This function tests that the machine verifier accepts an unconditional
+; branch from an invoke basic block, to its EH landing pad basic block.
+; The test is brittle and isn't ideally reduced, because in most cases the
+; branch would be removed (for instance, turned into a fallthrough), and in
+; that case, the machine verifier, which relies on analyzing branches for this
+; kind of verification, is unable to check anything, so accepts the CFG.
+
+define void @test_branch_to_landingpad() {
+entry:
+  br i1 undef, label %if.end50.thread, label %if.then6
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765* @"OBJC_EHTYPE_$_NSString"
+          catch %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765* @OBJC_EHTYPE_id
+          catch i8* null
+  br i1 undef, label %invoke.cont33, label %catch.fallthrough
+
+catch.fallthrough:
+  %matches31 = icmp eq i32 undef, 0
+  br i1 %matches31, label %invoke.cont41, label %finally.catchall
+
+if.then6:
+  invoke void @objc_exception_throw()
+          to label %invoke.cont7 unwind label %lpad
+
+invoke.cont7:
+  unreachable
+
+if.end50.thread:
+  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 125)
+  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 128)
+  unreachable
+
+invoke.cont33:
+  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 119)
+  unreachable
+
+invoke.cont41:
+  invoke void @objc_exception_rethrow()
+          to label %invoke.cont43 unwind label %lpad40
+
+invoke.cont43:
+  unreachable
+
+lpad40:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  br label %finally.catchall
+
+finally.catchall:
+  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 125)
+  unreachable
+}
+
+%struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765 = type { i8**, i8*, %struct._class_t.10.127.192.283.348.491.517.530.569.595.621.764* }
+%struct._class_t.10.127.192.283.348.491.517.530.569.595.621.764 = type { %struct._class_t.10.127.192.283.348.491.517.530.569.595.621.764*, %struct._class_t.10.127.192.283.348.491.517.530.569.595.621.764*, %struct._objc_cache.0.117.182.273.338.481.507.520.559.585.611.754*, i8* (i8*, i8*)**, %struct._class_ro_t.9.126.191.282.347.490.516.529.568.594.620.763* }
+%struct._objc_cache.0.117.182.273.338.481.507.520.559.585.611.754 = type opaque
+%struct._class_ro_t.9.126.191.282.347.490.516.529.568.594.620.763 = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct._objc_protocol_list.6.123.188.279.344.487.513.526.565.591.617.760*, %struct._ivar_list_t.8.125.190.281.346.489.515.528.567.593.619.762*, i8*, %struct._prop_list_t.4.121.186.277.342.485.511.524.563.589.615.758* }
+%struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756 = type { i32, i32, [0 x %struct._objc_method.1.118.183.274.339.482.508.521.560.586.612.755] }
+%struct._objc_method.1.118.183.274.339.482.508.521.560.586.612.755 = type { i8*, i8*, i8* }
+%struct._objc_protocol_list.6.123.188.279.344.487.513.526.565.591.617.760 = type { i64, [0 x %struct._protocol_t.5.122.187.278.343.486.512.525.564.590.616.759*] }
+%struct._protocol_t.5.122.187.278.343.486.512.525.564.590.616.759 = type { i8*, i8*, %struct._objc_protocol_list.6.123.188.279.344.487.513.526.565.591.617.760*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct._prop_list_t.4.121.186.277.342.485.511.524.563.589.615.758*, i32, i32, i8** }
+%struct._ivar_list_t.8.125.190.281.346.489.515.528.567.593.619.762 = type { i32, i32, [0 x %struct._ivar_t.7.124.189.280.345.488.514.527.566.592.618.761] }
+%struct._ivar_t.7.124.189.280.345.488.514.527.566.592.618.761 = type { i32*, i8*, i8*, i32, i32 }
+%struct._prop_list_t.4.121.186.277.342.485.511.524.563.589.615.758 = type { i32, i32, [0 x %struct._prop_t.3.120.185.276.341.484.510.523.562.588.614.757] }
+%struct._prop_t.3.120.185.276.341.484.510.523.562.588.614.757 = type { i8*, i8* }
+
+@.str1 = external unnamed_addr constant [17 x i8], align 1
+@OBJC_EHTYPE_id = external global %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765
+@"OBJC_EHTYPE_$_NSString" = external global %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765, section "__DATA,__datacoal_nt,coalesced", align 8
+
+declare void @objc_exception_throw()
+declare void @objc_exception_rethrow()
+declare i32 @__objc_personality_v0(...)
+declare void @printf(i8* nocapture readonly, ...)
diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index df8dc87..3686a1f 100644
--- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -366,7 +366,6 @@ define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) {
 ; CHECK-LABEL-DAG: .LBB9_3
 ; CHECK: cmp w19, #0
 ; CHECK: fcmp d8, #0.0
-; CHECK: b.gt .LBB9_5
 ; CHECK-NOT: cmp w19, #1
 ; CHECK-NOT: b.ge .LBB9_5
 
diff --git a/test/CodeGen/AArch64/compiler-ident.ll b/test/CodeGen/AArch64/compiler-ident.ll
index 0350571..217340d 100644
--- a/test/CodeGen/AArch64/compiler-ident.ll
+++ b/test/CodeGen/AArch64/compiler-ident.ll
@@ -8,5 +8,5 @@ target triple = "aarch64--linux-gnu"
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"some LLVM version"}
+!0 = !{!"some LLVM version"}
 
diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll
index f0f36bd..1266842 100644
--- a/test/CodeGen/AArch64/cpus.ll
+++ b/test/CodeGen/AArch64/cpus.ll
@@ -4,6 +4,7 @@
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a72 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
 
 ; CHECK-NOT: {{.*}}  is not a recognized processor for this target
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
index 22bd4a8..bd96ec7 100644
--- a/test/CodeGen/AArch64/dp-3source.ll
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -161,3 +161,18 @@ define i64 @test_umnegl(i32 %lhs, i32 %rhs) {
 ; CHECK: umnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
   ret i64 %res
 }
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+
+define void @test_mneg(){
+; CHECK-LABEL: test_mneg:
+  %1 = load i32* @a, align 4
+  %2 = load i32* @b, align 4
+  %3 = sub i32 0, %1
+  %4 = mul i32 %2, %3
+  store i32 %4, i32* @c, align 4
+; CHECK: mneg {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll
index 12412d4..d1f49a91 100644
--- a/test/CodeGen/AArch64/f16-convert.ll
+++ b/test/CodeGen/AArch64/f16-convert.ll
@@ -133,7 +133,8 @@ define void @store0(i16* nocapture %a, float %val) nounwind {
 
 define void @store1(i16* nocapture %a, double %val) nounwind {
 ; CHECK-LABEL: store1:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
 ; CHECK-NEXT: str  h0, [x0]
 ; CHECK-NEXT: ret
 
@@ -158,7 +159,8 @@ define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind {
 
 define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind {
 ; CHECK-LABEL: store3:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
 ; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
 ; CHECK-NEXT: ret
 
@@ -184,7 +186,8 @@ define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind {
 
 define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind {
 ; CHECK-LABEL: store5:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
 ; CHECK-NEXT: str h0, [x0, x1, lsl #1]
 ; CHECK-NEXT: ret
 
@@ -209,7 +212,8 @@ define void @store6(i16* nocapture %a, float %val) nounwind {
 
 define void @store7(i16* nocapture %a, double %val) nounwind {
 ; CHECK-LABEL: store7:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
 ; CHECK-NEXT: str h0, [x0, #20]
 ; CHECK-NEXT: ret
 
@@ -234,7 +238,8 @@ define void @store8(i16* nocapture %a, float %val) nounwind {
 
 define void @store9(i16* nocapture %a, double %val) nounwind {
 ; CHECK-LABEL: store9:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
 ; CHECK-NEXT: stur h0, [x0, #-20]
 ; CHECK-NEXT: ret
 
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
new file mode 100644
index 0000000..bc4a210
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-label: test_or
+; CHECK:       cbnz w0, {{LBB[0-9]+_2}}
+; CHECK:       cbz w1, {{LBB[0-9]+_1}}
+define i64 @test_or(i32 %a, i32 %b) {
+bb1:
+  %0 = icmp eq i32 %a, 0
+  %1 = icmp eq i32 %b, 0
+  %or.cond = or i1 %0, %1
+  br i1 %or.cond, label %bb3, label %bb4, !prof !0
+
+bb3:
+  ret i64 0
+
+bb4:
+  %2 = call i64 @bar()
+  ret i64 %2
+}
+
+; CHECK-label: test_ans
+; CHECK:       cbz w0, {{LBB[0-9]+_2}}
+; CHECK:       cbnz w1, {{LBB[0-9]+_3}}
+define i64 @test_and(i32 %a, i32 %b) {
+bb1:
+  %0 = icmp ne i32 %a, 0
+  %1 = icmp ne i32 %b, 0
+  %or.cond = and i1 %0, %1
+  br i1 %or.cond, label %bb4, label %bb3, !prof !1
+
+bb3:
+  ret i64 0
+
+bb4:
+  %2 = call i64 @bar()
+  ret i64 %2
+}
+
+declare i64 @bar()
+
+!0 = !{!"branch_weights", i32 5128, i32 32}
+!1 = !{!"branch_weights", i32 1024, i32 4136}
diff --git a/test/CodeGen/AArch64/fast-isel-branch_weights.ll b/test/CodeGen/AArch64/fast-isel-branch_weights.ll
index 5b22476..70dbdf2 100644
--- a/test/CodeGen/AArch64/fast-isel-branch_weights.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch_weights.ll
@@ -16,4 +16,4 @@ success:
   ret i64 0
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
+!0 = !{!"branch_weights", i32 0, i32 2147483647}
diff --git a/test/CodeGen/AArch64/fast-isel-memcpy.ll b/test/CodeGen/AArch64/fast-isel-memcpy.ll
new file mode 100644
index 0000000..9161dad
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-memcpy.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; Test that we don't segfault.
+; CHECK-LABEL: test
+; CHECK:       ldr [[REG1:x[0-9]+]], [x1]
+; CHECK-NEXT:  and [[REG2:x[0-9]+]], x0, #0x7fffffffffffffff
+; CHECK-NEXT:  str [[REG1]], {{\[}}[[REG2]]{{\]}}
+define void @test(i64 %a, i8* %b) {
+  %1 = and i64 %a, 9223372036854775807
+  %2 = inttoptr i64 %1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i32 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
diff --git a/test/CodeGen/AArch64/fast-isel-tbz.ll b/test/CodeGen/AArch64/fast-isel-tbz.ll
index d7f46b2..a5f02ff 100644
--- a/test/CodeGen/AArch64/fast-isel-tbz.ll
+++ b/test/CodeGen/AArch64/fast-isel-tbz.ll
@@ -1,5 +1,5 @@
-; RUN: llc                             -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
-; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc                             -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
 
 define i32 @icmp_eq_i8(i8 zeroext %a) {
 ; CHECK-LABEL: icmp_eq_i8
@@ -121,6 +121,160 @@ bb2:
   ret i32 0
 }
 
+define i32 @icmp_slt_i8(i8 zeroext %a) {
+; FAST-LABEL: icmp_slt_i8
+; FAST:       tbnz w0, #7, {{LBB.+_2}}
+  %1 = icmp slt i8 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_slt_i16(i16 zeroext %a) {
+; FAST-LABEL: icmp_slt_i16
+; FAST:       tbnz w0, #15, {{LBB.+_2}}
+  %1 = icmp slt i16 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_slt_i32(i32 %a) {
+; CHECK-LABEL: icmp_slt_i32
+; CHECK:       tbnz w0, #31, {{LBB.+_2}}
+  %1 = icmp slt i32 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_slt_i64(i64 %a) {
+; CHECK-LABEL: icmp_slt_i64
+; CHECK:       tbnz x0, #63, {{LBB.+_2}}
+  %1 = icmp slt i64 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sge_i8(i8 zeroext %a) {
+; FAST-LABEL: icmp_sge_i8
+; FAST:       tbz w0, #7, {{LBB.+_2}}
+  %1 = icmp sge i8 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sge_i16(i16 zeroext %a) {
+; FAST-LABEL: icmp_sge_i16
+; FAST:       tbz w0, #15, {{LBB.+_2}}
+  %1 = icmp sge i16 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sle_i8(i8 zeroext %a) {
+; FAST-LABEL: icmp_sle_i8
+; FAST:       tbnz w0, #7, {{LBB.+_2}}
+  %1 = icmp sle i8 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sle_i16(i16 zeroext %a) {
+; FAST-LABEL: icmp_sle_i16
+; FAST:       tbnz w0, #15, {{LBB.+_2}}
+  %1 = icmp sle i16 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sle_i32(i32 %a) {
+; CHECK-LABEL: icmp_sle_i32
+; CHECK:       tbnz w0, #31, {{LBB.+_2}}
+  %1 = icmp sle i32 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sle_i64(i64 %a) {
+; CHECK-LABEL: icmp_sle_i64
+; CHECK:       tbnz x0, #63, {{LBB.+_2}}
+  %1 = icmp sle i64 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sgt_i8(i8 zeroext %a) {
+; FAST-LABEL: icmp_sgt_i8
+; FAST:       tbz w0, #7, {{LBB.+_2}}
+  %1 = icmp sgt i8 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sgt_i16(i16 zeroext %a) {
+; FAST-LABEL: icmp_sgt_i16
+; FAST:       tbz w0, #15, {{LBB.+_2}}
+  %1 = icmp sgt i16 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sgt_i32(i32 %a) {
+; CHECK-LABEL: icmp_sgt_i32
+; CHECK:       tbz w0, #31, {{LBB.+_2}}
+  %1 = icmp sgt i32 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sgt_i64(i64 %a) {
+; FAST-LABEL: icmp_sgt_i64
+; FAST:       tbz x0, #63, {{LBB.+_2}}
+  %1 = icmp sgt i64 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
 ; Test that we don't fold the 'and' instruction into the compare.
 define i32 @icmp_eq_and_i32(i32 %a, i1 %c) {
 ; CHECK-LABEL: icmp_eq_and_i32
@@ -137,5 +291,5 @@ bb2:
   ret i32 0
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
-!1 = metadata !{metadata !"branch_weights", i32 2147483647, i32 0}
+!0 = !{!"branch_weights", i32 0, i32 2147483647}
+!1 = !{!"branch_weights", i32 2147483647, i32 0}
diff --git a/test/CodeGen/AArch64/fdiv-combine.ll b/test/CodeGen/AArch64/fdiv-combine.ll
new file mode 100644
index 0000000..389eefd
--- /dev/null
+++ b/test/CodeGen/AArch64/fdiv-combine.ll
@@ -0,0 +1,94 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; Following test cases check:
+;   a / D; b / D; c / D;
+;                =>
+;   recip = 1.0 / D; a * recip; b * recip; c * recip;
+define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-LABEL: three_fdiv_float:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv float %a, %D
+  %div1 = fdiv float %b, %D
+  %div2 = fdiv float %c, %D
+  tail call void @foo_3f(float %div, float %div1, float %div2)
+  ret void
+}
+
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  %div2 = fdiv double %c, %D
+  tail call void @foo_3d(double %div, double %div1, double %div2)
+  ret void
+}
+
+define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+; CHECK-LABEL: three_fdiv_4xfloat:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv <4 x float> %a, %D
+  %div1 = fdiv <4 x float> %b, %D
+  %div2 = fdiv <4 x float> %c, %D
+  tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
+  ret void
+}
+
+define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+; CHECK-LABEL: three_fdiv_2xdouble:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv <2 x double> %a, %D
+  %div1 = fdiv <2 x double> %b, %D
+  %div2 = fdiv <2 x double> %c, %D
+  tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
+  ret void
+}
+
+; Following test cases check we never combine two FDIVs if neither of them
+; calculates a reciprocal.
+define void @two_fdiv_float(float %D, float %a, float %b) #0 {
+; CHECK-LABEL: two_fdiv_float:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+  %div = fdiv float %a, %D
+  %div1 = fdiv float %b, %D
+  tail call void @foo_2f(float %div, float %div1)
+  ret void
+}
+
+define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+; CHECK-LABEL: two_fdiv_double:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  tail call void @foo_2d(double %div, double %div1)
+  ret void
+}
+
+declare void @foo_3f(float, float, float)
+declare void @foo_3d(double, double, double)
+declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
+declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
+declare void @foo_2f(float, float)
+declare void @foo_2d(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 9ee2296..b75f160 100644
--- a/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -188,10 +188,10 @@ define <8 x half> @s_to_h(<8 x float> %a) {
 
 define <8 x half> @d_to_h(<8 x double> %a) {
 ; CHECK-LABEL: d_to_h:
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
 ; CHECK-DAG: fcvt h
 ; CHECK-DAG: fcvt h
 ; CHECK-DAG: fcvt h
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index e59520c..b7db918 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -1,4 +1,6 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu                                                  -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large                             -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
 
 @varf32 = global float 0.0
 @varf64 = global double 0.0
@@ -34,3 +36,22 @@ define void @check_double() {
 ; CHECK: ret
   ret void
 }
+
+; LARGE-LABEL: check_float2
+; LARGE:       movz [[REG:w[0-9]+]], #0x4049, lsl #16
+; LARGE-NEXT:  movk [[REG]], #0xfdb
+; LARGE-NEXT:  fmov s0, [[REG]]
+define float @check_float2() {
+  ret float 3.14159274101257324218750
+}
+
+; LARGE-LABEL: check_double2
+; LARGE:       movz [[REG:x[0-9]+]], #0x4009, lsl #48
+; LARGE-NEXT:  movk [[REG]], #0x21fb, lsl #32
+; LARGE-NEXT:  movk [[REG]], #0x5444, lsl #16
+; LARGE-NEXT:  movk [[REG]], #0x2d18
+; LARGE-NEXT:  fmov d0, [[REG]]
+define double @check_double2() {
+  ret double 3.1415926535897931159979634685441851615905761718750
+}
+
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index abb732c..9fc9a5f 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -96,10 +96,8 @@ define [2 x i64] @return_struct() {
     %addr = bitcast %myStruct* @varstruct to [2 x i64]*
     %val = load [2 x i64]* %addr
     ret [2 x i64] %val
-; CHECK-DAG: ldr x0, [{{x[0-9]+}}, {{#?}}:lo12:varstruct]
-    ; Odd register regex below disallows x0 which we want to be live now.
-; CHECK-DAG: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, {{#?}}:lo12:varstruct
-; CHECK: ldr x1, [{{x[1-9][0-9]*}}, #8]
+; CHECK: add x[[VARSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varstruct
+; CHECK: ldp x0, x1, [x[[VARSTRUCT]]]
     ; Make sure epilogue immediately follows
 ; CHECK-NEXT: ret
 }
@@ -166,8 +164,8 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
 define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) {
 ; CHECK-LABEL: check_i128_regalign
     store i128 %val1, i128* @var128
-; CHECK-DAG: str x2, [{{x[0-9]+}}, {{#?}}:lo12:var128]
-; CHECK-DAG: str x3, [{{x[0-9]+}}, #8]
+; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
+; CHECK-DAG: stp x2, x3, [x[[VAR128]]]
 
     ret i64 %val2
 ; CHECK: mov x0, x4
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index 51979f0..16157f8 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -62,8 +62,8 @@ define void @simple_rets() {
   %arr = call [2 x i64] @return_smallstruct()
   store [2 x i64] %arr, [2 x i64]* @varsmallstruct
 ; CHECK: bl return_smallstruct
-; CHECK: str x1, [{{x[0-9]+}}, #8]
-; CHECK: str x0, [{{x[0-9]+}}, {{#?}}:lo12:varsmallstruct]
+; CHECK: add x[[VARSMALLSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varsmallstruct
+; CHECK: stp x0, x1, [x[[VARSMALLSTRUCT]]]
 
   call void @return_large_struct(%myStruct* sret @varstruct)
 ; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct
@@ -128,12 +128,12 @@ define void @check_i128_align() {
   call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
                                    i32 4, i32 5, i32 6, i32 7,
                                    i32 42, i128 %val)
-; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128]
-; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
+; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
 ; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16]
 
-; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128]
-; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
+; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
 ; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
 ; CHECK: bl check_i128_stackalign
 
diff --git a/test/CodeGen/AArch64/ghc-cc.ll b/test/CodeGen/AArch64/ghc-cc.ll
new file mode 100644
index 0000000..505bd5f
--- /dev/null
+++ b/test/CodeGen/AArch64/ghc-cc.ll
@@ -0,0 +1,89 @@
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+; Check the GHC call convention works (aarch64)
+
+@base  = external global i64 ; assigned to register: r19
+@sp    = external global i64 ; assigned to register: r20
+@hp    = external global i64 ; assigned to register: r21
+@r1    = external global i64 ; assigned to register: r22
+@r2    = external global i64 ; assigned to register: r23
+@r3    = external global i64 ; assigned to register: r24
+@r4    = external global i64 ; assigned to register: r25
+@r5    = external global i64 ; assigned to register: r26
+@r6    = external global i64 ; assigned to register: r27
+@splim = external global i64 ; assigned to register: r28
+
+@f1 = external global float  ; assigned to register: s8
+@f2 = external global float  ; assigned to register: s9
+@f3 = external global float  ; assigned to register: s10
+@f4 = external global float  ; assigned to register: s11
+
+@d1 = external global double ; assigned to register: d12
+@d2 = external global double ; assigned to register: d13
+@d3 = external global double ; assigned to register: d14
+@d4 = external global double ; assigned to register: d15
+
+define ghccc i64 @addtwo(i64 %x, i64 %y) nounwind {
+entry:
+  ; CHECK-LABEL: addtwo
+  ; CHECK:       add      x0, x19, x20
+  ; CHECK-NEXT:  ret
+  %0 = add i64 %x, %y
+  ret i64 %0
+}
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+  ; CHECK-LABEL: zap
+  ; CHECK-NOT:   mov   {{x[0-9]+}}, sp
+  ; CHECK:       bl    addtwo
+  ; CHECK-NEXT:  bl    foo
+  %0 = call ghccc i64 @addtwo(i64 %a, i64 %b)
+  call void @foo() nounwind
+  ret void
+}
+
+define ghccc void @foo_i64 () nounwind {
+entry:
+  ; CHECK-LABEL: foo_i64
+  ; CHECK:       adrp    {{x[0-9]+}}, base
+  ; CHECK-NEXT:  ldr     x19, [{{x[0-9]+}}, :lo12:base]
+  ; CHECK-NEXT:  bl      bar_i64
+  ; CHECK-NEXT:  ret
+
+  %0 = load i64* @base
+  tail call ghccc void @bar_i64( i64 %0 ) nounwind
+  ret void
+}
+
+define ghccc void @foo_float () nounwind {
+entry:
+  ; CHECK-LABEL: foo_float
+  ; CHECK:       adrp    {{x[0-9]+}}, f1
+  ; CHECK-NEXT:  ldr     s8, [{{x[0-9]+}}, :lo12:f1]
+  ; CHECK-NEXT:  bl      bar_float
+  ; CHECK-NEXT:  ret
+
+  %0 = load float* @f1
+  tail call ghccc void @bar_float( float %0 ) nounwind
+  ret void
+}
+
+define ghccc void @foo_double () nounwind {
+entry:
+  ; CHECK-LABEL: foo_double
+  ; CHECK:       adrp    {{x[0-9]+}}, d1
+  ; CHECK-NEXT:  ldr     d12, [{{x[0-9]+}}, :lo12:d1]
+  ; CHECK-NEXT:  bl      bar_double
+  ; CHECK-NEXT:  ret
+
+  %0 = load double* @d1
+  tail call ghccc void @bar_double( double %0 ) nounwind
+  ret void
+}
+
+declare ghccc void @foo ()
+
+declare ghccc void @bar_i64 (i64)
+declare ghccc void @bar_float (float)
+declare ghccc void @bar_double (double)
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
index 68aba5e..7dc8da1 100644
--- a/test/CodeGen/AArch64/global-merge-1.ll
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -11,6 +11,7 @@
 @n = internal global i32 0, align 4
 
 define void @f1(i32 %a1, i32 %a2) {
+;CHECK-APPLE-IOS-NOT: adrp
 ;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals@PAGE
 ;CHECK-APPLE-IOS-NOT: adrp
 ;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals@PAGEOFF
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
index a773566..70b700c 100644
--- a/test/CodeGen/AArch64/global-merge-2.ll
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -8,6 +8,7 @@
 
 define void @f1(i32 %a1, i32 %a2) {
 ;CHECK-APPLE-IOS-LABEL: _f1:
+;CHECK-APPLE-IOS-NOT: adrp
 ;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
 ;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
 ;CHECK-APPLE-IOS-NOT: adrp
diff --git a/test/CodeGen/AArch64/implicit-sret.ll b/test/CodeGen/AArch64/implicit-sret.ll
new file mode 100644
index 0000000..264d519
--- /dev/null
+++ b/test/CodeGen/AArch64/implicit-sret.ll
@@ -0,0 +1,13 @@
+; RUN: llc %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s
+;
+; Handle implicit sret arguments that are generated on-the-fly during lowering.
+; <rdar://19792160> Null pointer assertion in AArch64TargetLowering
+
+; CHECK-LABEL: big_retval
+; ... str or stp for the first 1024 bits
+; CHECK: strb wzr, [x8, #128]
+; CHECK: ret
+define i1032 @big_retval() {
+entry:
+  ret i1032 0
+}
diff --git a/test/CodeGen/AArch64/large_shift.ll b/test/CodeGen/AArch64/large_shift.ll
new file mode 100644
index 0000000..f72c97d
--- /dev/null
+++ b/test/CodeGen/AArch64/large_shift.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=aarch64 -o - %s
+target triple = "arm64-unknown-unknown"
+
+; Make sure we don't run into an assert in the aarch64 code selection when
+; DAGCombining fails.
+
+declare void @t()
+
+define void @foo() {
+  %c = bitcast i64 270458 to i64
+  %t0 = lshr i64 %c, 422383
+  %t1 = trunc i64 %t0 to i1
+  br i1 %t1, label %BB1, label %BB0
+
+BB0:
+  call void @t()
+  br label %BB1
+
+BB1:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll b/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
new file mode 100644
index 0000000..e77824f
--- /dev/null
+++ b/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=aarch64-apple-ios -fast-isel -verify-machineinstrs | FileCheck %s
+
+; Check that the kill flag is cleared between CSE'd instructions on their
+; imp-def'd registers.
+; The verifier would complain otherwise.
+define i64 @csed-impdef-killflag(i64 %a) {
+; CHECK-LABEL: csed-impdef-killflag
+; CHECK-DAG:  mov    [[REG0:w[0-9]+]], wzr
+; CHECK-DAG:  orr    [[REG1:w[0-9]+]], wzr, #0x1
+; CHECK-DAG:  orr    [[REG2:x[0-9]+]], xzr, #0x2
+; CHECK-DAG:  orr    [[REG3:x[0-9]+]], xzr, #0x3
+; CHECK:      cmp    x0, #0
+; CHECK-DAG:  csel   w[[SELECT_WREG_1:[0-9]+]], [[REG0]], [[REG1]], ne
+; CHECK-DAG:  csel   [[SELECT_XREG_2:x[0-9]+]], [[REG2]], [[REG3]], ne
+; CHECK:      ubfx   [[SELECT_XREG_1:x[0-9]+]], x[[SELECT_WREG_1]], #0, #32
+; CHECK-NEXT: add    x0, [[SELECT_XREG_2]], [[SELECT_XREG_1]]
+; CHECK-NEXT: ret
+
+  %1 = icmp ne i64 %a, 0
+  %2 = select i1 %1, i32 0, i32 1
+  %3 = icmp ne i64 %a, 0
+  %4 = select i1 %3, i64 2, i64 3
+  %5 = zext i32 %2 to i64
+  %6 = add i64 %4, %5
+  ret i64 %6
+}
diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll
index 6afac31..3f77060 100644
--- a/test/CodeGen/AArch64/neon-scalar-copy.ll
+++ b/test/CodeGen/AArch64/neon-scalar-copy.ll
@@ -1,101 +1,145 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -asm-verbose=false < %s | FileCheck %s
 
-
-define float @test_dup_sv2S(<2 x float> %v) {
- ; CHECK-LABEL: test_dup_sv2S
- ; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+define float @test_dup_sv2S(<2 x float> %v) #0 {
+ ; CHECK-LABEL: test_dup_sv2S:
+ ; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1]
+ ; CHECK-NEXT: ret
  %tmp1 = extractelement <2 x float> %v, i32 1
  ret float  %tmp1
 }
 
-define float @test_dup_sv2S_0(<2 x float> %v) {
- ; CHECK-LABEL: test_dup_sv2S_0
+define float @test_dup_sv2S_0(<2 x float> %v) #0 {
+ ; CHECK-LABEL: test_dup_sv2S_0:
  ; CHECK-NOT: dup {{[vsd][0-9]+}}
  ; CHECK-NOT: ins {{[vsd][0-9]+}}
- ; CHECK: ret
+ ; CHECK-NEXT: ret
  %tmp1 = extractelement <2 x float> %v, i32 0
  ret float  %tmp1
 }
 
-define float @test_dup_sv4S(<4 x float> %v) {
- ; CHECK-LABEL: test_dup_sv4S
+define float @test_dup_sv4S(<4 x float> %v) #0 {
+ ; CHECK-LABEL: test_dup_sv4S:
+ ; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1]
+ ; CHECK-NEXT: ret
+ %tmp1 = extractelement <4 x float> %v, i32 1
+ ret float  %tmp1
+}
+
+define float @test_dup_sv4S_0(<4 x float> %v) #0 {
+ ; CHECK-LABEL: test_dup_sv4S_0:
  ; CHECK-NOT: dup {{[vsd][0-9]+}}
  ; CHECK-NOT: ins {{[vsd][0-9]+}}
- ; CHECK: ret
+ ; CHECK-NEXT: ret
  %tmp1 = extractelement <4 x float> %v, i32 0
  ret float  %tmp1
 }
 
-define double @test_dup_dvD(<1 x double> %v) {
- ; CHECK-LABEL: test_dup_dvD
+define double @test_dup_dvD(<1 x double> %v) #0 {
+ ; CHECK-LABEL: test_dup_dvD:
  ; CHECK-NOT: dup {{[vsd][0-9]+}}
  ; CHECK-NOT: ins {{[vsd][0-9]+}}
- ; CHECK: ret
+ ; CHECK-NEXT: ret
  %tmp1 = extractelement <1 x double> %v, i32 0
  ret double  %tmp1
 }
 
-define double @test_dup_dv2D(<2 x double> %v) {
- ; CHECK-LABEL: test_dup_dv2D
- ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+define double @test_dup_dv2D(<2 x double> %v) #0 {
+ ; CHECK-LABEL: test_dup_dv2D:
+ ; CHECK-NEXT: mov d{{[0-9]+}}, {{v[0-9]+}}.d[1]
+ ; CHECK-NEXT: ret
  %tmp1 = extractelement <2 x double> %v, i32 1
  ret double  %tmp1
 }
 
-define double @test_dup_dv2D_0(<2 x double> %v) {
- ; CHECK-LABEL: test_dup_dv2D_0
- ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
- ; CHECK: ret
- %tmp1 = extractelement <2 x double> %v, i32 1
+define double @test_dup_dv2D_0(<2 x double> %v) #0 {
+ ; CHECK-LABEL: test_dup_dv2D_0:
+ ; CHECK-NOT: dup {{[vsd][0-9]+}}
+ ; CHECK-NOT: ins {{[vsd][0-9]+}}
+ ; CHECK-NEXT: ret
+ %tmp1 = extractelement <2 x double> %v, i32 0
  ret double  %tmp1
 }
 
-define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) {
- ; CHECK-LABEL: test_vector_dup_bv16B
+define half @test_dup_hv8H(<8 x half> %v) #0 {
+ ; CHECK-LABEL: test_dup_hv8H:
+ ; CHECK-NEXT: mov h{{[0-9]+}}, {{v[0-9]+}}.h[1]
+ ; CHECK-NEXT: ret
+ %tmp1 = extractelement <8 x half> %v, i32 1
+ ret half  %tmp1
+}
+
+define half @test_dup_hv8H_0(<8 x half> %v) #0 {
+ ; CHECK-LABEL: test_dup_hv8H_0:
+ ; CHECK-NOT: dup {{[vsdh][0-9]+}}
+ ; CHECK-NOT: ins {{[vsdh][0-9]+}}
+ ; CHECK-NEXT: ret
+ %tmp1 = extractelement <8 x half> %v, i32 0
+ ret half  %tmp1
+}
+
+define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_bv16B:
+ ; CHECK-NEXT: umov [[W:w[0-9]+]], v0.b[14]
+ ; CHECK-NEXT: fmov s0, [[W]]
+ ; CHECK-NEXT: ret
  %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14> 
  ret <1 x i8> %shuffle.i
 }
 
-define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) {
- ; CHECK-LABEL: test_vector_dup_bv8B
+define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_bv8B:
+ ; CHECK-NEXT: dup v0.8b, v0.b[7]
+ ; CHECK-NEXT: ret
  %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> <i32 7> 
  ret <1 x i8> %shuffle.i
 }
 
-define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) {
- ; CHECK-LABEL: test_vector_dup_hv8H
+define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_hv8H:
+ ; CHECK-NEXT: umov [[W:w[0-9]+]], v0.h[7]
+ ; CHECK-NEXT: fmov s0, [[W]]
+ ; CHECK-NEXT: ret
  %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7> 
  ret <1 x i16> %shuffle.i
 }
 
-define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) {
- ; CHECK-LABEL: test_vector_dup_hv4H
+define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_hv4H:
+ ; CHECK-NEXT: dup v0.4h, v0.h[3]
+ ; CHECK-NEXT: ret
  %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> <i32 3> 
  ret <1 x i16> %shuffle.i
 }
 
-define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) {
- ; CHECK-LABEL: test_vector_dup_sv4S
+define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_sv4S:
+ ; CHECK-NEXT: mov  [[W:w[0-9]+]], v0.s[3]
+ ; CHECK-NEXT: fmov s0, [[W]]
+ ; CHECK-NEXT: ret
  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3> 
  ret <1 x i32> %shuffle
 }
 
-define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) {
- ; CHECK-LABEL: test_vector_dup_sv2S
+define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_sv2S:
+ ; CHECK-NEXT: dup v0.2s, v0.s[1]
+ ; CHECK-NEXT: ret
  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> <i32 1> 
  ret <1 x i32> %shuffle
 }
 
-define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
- ; CHECK-LABEL: test_vector_dup_dv2D
- ; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
+define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_dv2D:
+ ; CHECK-NEXT: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
+ ; CHECK-NEXT: ret
  %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1> 
  ret <1 x i64> %shuffle.i
 }
 
-define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) {
-  ; CHECK-LABEL: test_vector_copy_dup_dv2D
-  ; CHECK: {{dup|mov}} {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) #0 {
+  ; CHECK-LABEL: test_vector_copy_dup_dv2D:
+  ; CHECK-NEXT: {{dup|mov}} {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  ; CHECK-NEXT: ret
   %vget_lane = extractelement <2 x i64> %c, i32 1
   %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0
   ret <1 x i64> %vset_lane
@@ -118,3 +162,5 @@ define void @test_out_of_range_insert(<4 x i32> %vec, i32 %elt) {
   insertelement <4 x i32> %vec, i32 %elt, i32 4
   ret void
 }
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/or-combine.ll b/test/CodeGen/AArch64/or-combine.ll
new file mode 100644
index 0000000..c6c343a
--- /dev/null
+++ b/test/CodeGen/AArch64/or-combine.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define i32 @test_consts(i32 %in) {
+; CHECK-LABEL: test_consts:
+; CHECK-NOT: bfxil
+; CHECK-NOT: and
+; CHECK-NOT: orr
+; CHECK: ret
+
+  %lo = and i32 %in, 65535
+  %hi = and i32 %in, -65536
+  %res = or i32 %lo, %hi
+  ret i32 %res
+}
+
+define i32 @test_generic(i32 %in, i32 %mask1, i32 %mask2) {
+; CHECK-LABEL: test_generic:
+; CHECK: orr [[FULL_MASK:w[0-9]+]], w1, w2
+; CHECK: and w0, w0, [[FULL_MASK]]
+
+  %lo = and i32 %in, %mask1
+  %hi = and i32 %in, %mask2
+  %res = or i32 %lo, %hi
+  ret i32 %res
+}
+
+; In this case the transformation isn't profitable, since %lo and %hi
+; are used more than once.
+define [3 x i32] @test_reuse(i32 %in, i32 %mask1, i32 %mask2) {
+; CHECK-LABEL: test_reuse:
+; CHECK-DAG: and w1, w0, w1
+; CHECK-DAG: and w2, w0, w2
+; CHECK-DAG: orr w0, w1, w2
+
+  %lo = and i32 %in, %mask1
+  %hi = and i32 %in, %mask2
+  %recombine = or i32 %lo, %hi
+
+  %res.tmp0 = insertvalue [3 x i32] undef, i32 %recombine, 0
+  %res.tmp1 = insertvalue [3 x i32] %res.tmp0, i32 %lo, 1
+  %res = insertvalue [3 x i32] %res.tmp1, i32 %hi, 2
+
+  ret [3 x i32] %res
+}
diff --git a/test/CodeGen/AArch64/ragreedy-csr.ll b/test/CodeGen/AArch64/ragreedy-csr.ll
index de29b1b..31ff543 100644
--- a/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -271,27 +271,27 @@ return:
   %retval.0 = phi i32 [ 0, %entry ], [ 1, %land.lhs.true52 ], [ 1, %land.lhs.true43 ], [ 0, %if.else123 ], [ 1, %while.cond59.preheader ], [ 1, %while.cond95.preheader ], [ 1, %while.cond130.preheader ], [ 1, %land.lhs.true28 ], [ 1, %if.then83 ], [ 0, %lor.lhs.false74 ], [ 1, %land.rhs ], [ 1, %if.then117 ], [ 0, %while.body104 ], [ 1, %land.rhs99 ], [ 1, %if.then152 ], [ 0, %while.body139 ], [ 1, %land.rhs134 ], [ 0, %while.body ]
   ret i32 %retval.0
 }
-!181 = metadata !{metadata !"branch_weights", i32 662038, i32 1}
-!988 = metadata !{metadata !"branch_weights", i32 12091450, i32 1916}
-!989 = metadata !{metadata !"branch_weights", i32 7564670, i32 4526781}
-!990 = metadata !{metadata !"branch_weights", i32 7484958, i32 13283499}
-!991 = metadata !{metadata !"branch_weights", i32 8677007, i32 4606493}
-!992 = metadata !{metadata !"branch_weights", i32 -1172426948, i32 145094705}
-!993 = metadata !{metadata !"branch_weights", i32 1468914, i32 5683688}
-!994 = metadata !{metadata !"branch_weights", i32 114025221, i32 -1217548794, i32 -1199521551, i32 87712616}
-!995 = metadata !{metadata !"branch_weights", i32 1853716452, i32 -444717951, i32 932776759}
-!996 = metadata !{metadata !"branch_weights", i32 1004870, i32 20259}
-!997 = metadata !{metadata !"branch_weights", i32 20071, i32 189}
-!998 = metadata !{metadata !"branch_weights", i32 -1020255939, i32 572177766}
-!999 = metadata !{metadata !"branch_weights", i32 2666513, i32 3466431}
-!1000 = metadata !{metadata !"branch_weights", i32 5117635, i32 1859780}
-!1001 = metadata !{metadata !"branch_weights", i32 354902465, i32 -1444604407}
-!1002 = metadata !{metadata !"branch_weights", i32 -1762419279, i32 1592770684}
-!1003 = metadata !{metadata !"branch_weights", i32 1435905930, i32 -1951930624}
-!1004 = metadata !{metadata !"branch_weights", i32 1, i32 504888}
-!1005 = metadata !{metadata !"branch_weights", i32 94662, i32 504888}
-!1006 = metadata !{metadata !"branch_weights", i32 -1897793104, i32 160196332}
-!1007 = metadata !{metadata !"branch_weights", i32 2074643678, i32 -29579071}
-!1008 = metadata !{metadata !"branch_weights", i32 1, i32 226163}
-!1009 = metadata !{metadata !"branch_weights", i32 58357, i32 226163}
-!1010 = metadata !{metadata !"branch_weights", i32 -2072848646, i32 92907517}
+!181 = !{!"branch_weights", i32 662038, i32 1}
+!988 = !{!"branch_weights", i32 12091450, i32 1916}
+!989 = !{!"branch_weights", i32 7564670, i32 4526781}
+!990 = !{!"branch_weights", i32 7484958, i32 13283499}
+!991 = !{!"branch_weights", i32 8677007, i32 4606493}
+!992 = !{!"branch_weights", i32 -1172426948, i32 145094705}
+!993 = !{!"branch_weights", i32 1468914, i32 5683688}
+!994 = !{!"branch_weights", i32 114025221, i32 -1217548794, i32 -1199521551, i32 87712616}
+!995 = !{!"branch_weights", i32 1853716452, i32 -444717951, i32 932776759}
+!996 = !{!"branch_weights", i32 1004870, i32 20259}
+!997 = !{!"branch_weights", i32 20071, i32 189}
+!998 = !{!"branch_weights", i32 -1020255939, i32 572177766}
+!999 = !{!"branch_weights", i32 2666513, i32 3466431}
+!1000 = !{!"branch_weights", i32 5117635, i32 1859780}
+!1001 = !{!"branch_weights", i32 354902465, i32 -1444604407}
+!1002 = !{!"branch_weights", i32 -1762419279, i32 1592770684}
+!1003 = !{!"branch_weights", i32 1435905930, i32 -1951930624}
+!1004 = !{!"branch_weights", i32 1, i32 504888}
+!1005 = !{!"branch_weights", i32 94662, i32 504888}
+!1006 = !{!"branch_weights", i32 -1897793104, i32 160196332}
+!1007 = !{!"branch_weights", i32 2074643678, i32 -29579071}
+!1008 = !{!"branch_weights", i32 1, i32 226163}
+!1009 = !{!"branch_weights", i32 58357, i32 226163}
+!1010 = !{!"branch_weights", i32 -2072848646, i32 92907517}
diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
index 32b3ed2..8b3e6dd 100644
--- a/test/CodeGen/AArch64/remat.ll
+++ b/test/CodeGen/AArch64/remat.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a57 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a72 -o - %s | FileCheck %s
 
 %X = type { i64, i64, i64 }
 declare void @f(%X*)
diff --git a/test/CodeGen/AArch64/setcc-type-mismatch.ll b/test/CodeGen/AArch64/setcc-type-mismatch.ll
new file mode 100644
index 0000000..86817fa
--- /dev/null
+++ b/test/CodeGen/AArch64/setcc-type-mismatch.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+
+define void @test_mismatched_setcc(<4 x i22> %l, <4 x i22> %r, <4 x i1>* %addr) {
+; CHECK-LABEL: test_mismatched_setcc:
+; CHECK: cmeq [[CMP128:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: xtn {{v[0-9]+}}.4h, [[CMP128]].4s
+
+  %tst = icmp eq <4 x i22> %l, %r
+  store <4 x i1> %tst, <4 x i1>* %addr
+  ret void
+}